framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16,1,0,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,0,0.1817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32,1,0,0.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1536,1,0,1.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4096,1,0,4.0360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,128,1,0,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,3072,1,0,2.9681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1024,1,0,0.7284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,6144,1,0,6.2508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,10240,1,0,10.6966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,12288,1,0,13.4217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,64,1,0,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,2048,1,0,1.5817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16384,1,0,18.9055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8192,1,0,8.4029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32,1,0,0.1574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,64,1,0,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16,1,0,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,0,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32768,1,0,41.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,256,1,0,0.3585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,512,1,0,0.3719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,512,1,0,0.6670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1024,1,0,1.3592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,2048,1,0,3.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1536,1,0,2.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,3072,1,0,5.6535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,256,1,0,0.2502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4096,1,0,7.8690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,6144,1,0,12.3680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,128,1,0,0.2437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8192,1,0,17.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,10240,1,0,21.9668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16,1,0,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,12288,1,0,26.6133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,64,1,0,0.2448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,128,1,0,0.3489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16384,1,0,37.4985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32,1,0,0.1818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,256,1,0,0.6397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,512,1,0,1.2432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,0,0.1853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1024,1,0,2.6098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1536,1,0,4.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,2048,1,0,6.2148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,3072,1,0,11.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32768,1,0,83.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4096,1,0,15.6388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,6144,1,0,24.9665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8192,1,0,34.5972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,10240,1,0,45.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,16,1,0,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,32,1,0,0.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,0,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,12288,1,0,54.9438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,64,1,0,0.3484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,128,1,0,0.6292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,256,1,0,1.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16384,1,0,75.9527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,512,1,0,2.3752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1024,1,0,5.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,2048,1,0,12.5137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,3072,1,0,22.4574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1536,1,0,8.5397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,4096,1,0,31.1980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,0,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,6144,1,0,50.4765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32768,1,0,166.4409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,16,1,0,0.2474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,32,1,0,0.3504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,8192,1,0,70.5683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,64,1,0,0.6295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,10240,1,0,90.0516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,128,1,0,1.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,256,1,0,2.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,512,1,0,4.5662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1024,1,0,10.4568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,12288,1,0,113.2653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1536,1,0,17.5963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,2048,1,0,25.3846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,0,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,16,1,0,0.3511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,16384,1,0,155.3197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,32,1,0,0.6264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,3072,1,0,46.4254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,64,1,0,1.1665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,128,1,0,2.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,4096,1,0,64.3033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,512,1,0,9.3071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,6144,1,0,100.4373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,256,1,0,4.3870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1024,1,0,21.6158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,0,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1536,1,0,35.5887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,32,1,0,1.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,16,1,0,0.6342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,8192,1,0,139.9569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,2048,1,0,52.3737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,64,1,0,2.2278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,128,1,0,4.2674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,256,1,0,8.9415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,512,1,0,19.7010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,3072,1,0,91.9119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,0,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,16,1,0,1.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,4096,1,0,128.9101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,32,1,0,2.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1024,1,0,44.0897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,64,1,0,4.2792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,128,1,0,8.6883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1536,1,0,73.5069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,256,1,0,18.3826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,16,1,0,2.2497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,0,0.2991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,64,1,0,8.8734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,512,1,0,40.0673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,2048,1,0,106.7522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,32,1,0,4.2777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,128,1,0,18.0193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,256,1,0,37.0012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1024,1,0,88.7479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,512,1,0,79.9997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,64,1,0,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32,1,0,0.1245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,128,1,0,0.1587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,256,1,0,0.2300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,0,0.1490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,512,1,0,0.3958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1024,1,0,0.8701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1536,1,0,1.5563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,2048,1,0,2.4275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8192,1,0,14.0167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,3072,1,0,4.6945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,10240,1,0,18.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4096,1,0,6.6338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,12288,1,0,22.3431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,0,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16,1,0,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16384,1,0,30.5000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,6144,1,0,10.2927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32,1,0,0.1344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,64,1,0,0.1574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,128,1,0,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,256,1,0,0.3520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,512,1,0,0.6882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32768,1,0,65.4012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1536,1,0,3.0726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,2048,1,0,4.8556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1024,1,0,1.7118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,3072,1,0,9.1936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,6144,1,0,20.7627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4096,1,0,13.0259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,0,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8192,1,0,28.9375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32,1,0,0.1573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,10240,1,0,36.7280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16384,1,0,61.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,128,1,0,0.3252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,12288,1,0,44.7690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,256,1,0,0.5962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,64,1,0,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,512,1,0,1.3592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1024,1,0,3.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1536,1,0,6.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,3072,1,0,18.3386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,2048,1,0,9.6484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4096,1,0,26.2309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,6144,1,0,41.5835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32768,1,0,131.2956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16,1,0,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,0,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8192,1,0,57.8322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,32,1,0,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,10240,1,0,74.2787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,64,1,0,0.3263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,128,1,0,0.5483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,256,1,0,1.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,12288,1,0,90.8300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,512,1,0,2.7071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1024,1,0,6.6675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1536,1,0,12.5249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16384,1,0,123.5513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,2048,1,0,19.6105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,3072,1,0,36.0891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4096,1,0,52.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,0,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,6144,1,0,84.0679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,16,1,0,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,32,1,0,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,64,1,0,0.5485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8192,1,0,116.5599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,128,1,0,1.0618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,256,1,0,2.2545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32768,1,0,264.8820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,512,1,0,5.2669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,10240,1,0,148.9313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1024,1,0,13.5031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1536,1,0,25.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,2048,1,0,39.5446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,12288,1,0,182.0507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,16,1,0,0.3273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,32,1,0,0.5481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,3072,1,0,72.5711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,64,1,0,1.0652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,128,1,0,2.0691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,256,1,0,4.3492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16384,1,0,249.8786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,4096,1,0,104.4920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,512,1,0,10.7546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1024,1,0,27.4287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,0,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,16,1,0,0.5531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,6144,1,0,168.6112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1536,1,0,51.0665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,32,1,0,1.0610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,64,1,0,2.0651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,2048,1,0,79.5335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,256,1,0,8.8107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,128,1,0,3.9516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,8192,1,0,233.6761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,512,1,0,21.9896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,0,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,16,1,0,1.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,3072,1,0,145.6307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,32,1,0,2.0755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1024,1,0,55.2017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,64,1,0,3.9510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,128,1,0,8.0179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,256,1,0,17.9872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,0,0.2428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,16,1,0,2.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1536,1,0,102.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,4096,1,0,210.5576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,512,1,0,44.5130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,32,1,0,3.9563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,64,1,0,8.0227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,128,1,0,16.3736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,2048,1,0,160.3460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,256,1,0,36.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1024,1,0,110.8832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,512,1,0,90.0265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16,1,0,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,64,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,128,1,0,0.1307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,256,1,0,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,512,1,0,0.3080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.6104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1536,1,0,0.9974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,2048,1,0,1.4968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,3072,1,0,2.9441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4096,1,0,4.2234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,6144,1,0,6.3704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,12288,1,0,13.7751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16384,1,0,18.6106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16,1,0,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8192,1,0,8.5583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32,1,0,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,64,1,0,0.1312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32768,1,0,40.8598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,10240,1,0,11.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,128,1,0,0.1786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,256,1,0,0.2838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1024,1,0,1.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,512,1,0,0.5177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1536,1,0,1.9489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,3072,1,0,5.7630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,2048,1,0,2.9982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,6144,1,0,12.5994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4096,1,0,8.0796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,12288,1,0,27.3170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,10240,1,0,22.1348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16384,1,0,37.6614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8192,1,0,17.4205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,0,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32,1,0,0.1312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,64,1,0,0.1794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,128,1,0,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,512,1,0,0.9380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1536,1,0,3.9545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1024,1,0,2.2575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,2048,1,0,5.9056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,256,1,0,0.4725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32768,1,0,82.5367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,3072,1,0,11.4476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,6144,1,0,25.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4096,1,0,16.0583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8192,1,0,35.0631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16,1,0,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,32,1,0,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,64,1,0,0.2714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,12288,1,0,55.0288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,10240,1,0,44.8243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,128,1,0,0.4463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,256,1,0,0.8455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,512,1,0,1.8831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16384,1,0,75.7130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1024,1,0,4.4357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,2048,1,0,12.0421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1536,1,0,7.9576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4096,1,0,31.8201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,3072,1,0,22.3289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,0,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8192,1,0,70.6330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,6144,1,0,50.9334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,16,1,0,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,32,1,0,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32768,1,0,165.5993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,10240,1,0,89.9351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,64,1,0,0.4481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,256,1,0,1.6896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,128,1,0,0.8001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,512,1,0,3.7200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1024,1,0,9.0131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,12288,1,0,111.0252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1536,1,0,16.1632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16384,1,0,152.6458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,2048,1,0,24.4986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,3072,1,0,44.9101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,16,1,0,0.2719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,32,1,0,0.4466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,128,1,0,1.5902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,64,1,0,0.7983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4096,1,0,63.8409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,512,1,0,7.6739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,256,1,0,3.2826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1024,1,0,18.3963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1536,1,0,32.6145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,6144,1,0,102.1553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,16,1,0,0.4486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,8192,1,0,142.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,2048,1,0,49.5696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,32,1,0,0.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,128,1,0,3.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,256,1,0,6.6765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,3072,1,0,89.7401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,512,1,0,15.7219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,64,1,0,1.5891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,0,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1024,1,0,37.2757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,32,1,0,1.5904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,64,1,0,3.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,16,1,0,0.8022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,4096,1,0,128.4364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,128,1,0,6.3019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1536,1,0,65.4350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,256,1,0,13.7215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,16,1,0,1.5935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,0,0.1938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,64,1,0,6.3030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,512,1,0,31.7574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,2048,1,0,99.3526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,32,1,0,3.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,128,1,0,12.9621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,256,1,0,27.6401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1024,1,0,74.4182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,512,1,0,63.5872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32,1,0,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,64,1,0,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,128,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,256,1,0,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,512,1,0,0.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,3072,1,0,2.1292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4096,1,0,2.9054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8192,1,0,5.8573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.4804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1536,1,0,0.7405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,2048,1,0,1.0458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,6144,1,0,4.2589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,64,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,10240,1,0,7.6707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,12288,1,0,9.4957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,128,1,0,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,256,1,0,0.2447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16384,1,0,12.8799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,512,1,0,0.4329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1024,1,0,0.8555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1536,1,0,1.3887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,2048,1,0,2.0146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,3072,1,0,3.9845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32768,1,0,28.9372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8192,1,0,11.9739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4096,1,0,5.6219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,12288,1,0,18.6339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,6144,1,0,8.6724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,10240,1,0,15.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16,1,0,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16384,1,0,25.7886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,64,1,0,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,256,1,0,0.4053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,128,1,0,0.2378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1024,1,0,1.6293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,2048,1,0,4.0406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,3072,1,0,8.0612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1536,1,0,2.7287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4096,1,0,11.0403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32768,1,0,58.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,512,1,0,0.7615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8192,1,0,23.6541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,6144,1,0,17.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,10240,1,0,30.5547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16,1,0,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,32,1,0,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,12288,1,0,37.5649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,64,1,0,0.2368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16384,1,0,52.1939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,128,1,0,0.3921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,256,1,0,0.7119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,512,1,0,1.4395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1024,1,0,3.3107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1536,1,0,5.6956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,2048,1,0,8.3837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4096,1,0,21.6590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,6144,1,0,34.5170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32768,1,0,117.0409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,3072,1,0,15.4544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8192,1,0,47.9446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,16,1,0,0.1564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,10240,1,0,60.8609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,32,1,0,0.2379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,64,1,0,0.3931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,128,1,0,0.6900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,256,1,0,1.3524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,12288,1,0,75.2993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,512,1,0,2.9263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1024,1,0,6.8179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1536,1,0,11.5959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16384,1,0,104.8388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,3072,1,0,31.2613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,2048,1,0,17.0229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,16,1,0,0.2394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,64,1,0,0.6892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,128,1,0,1.3026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,32,1,0,0.3922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,6144,1,0,69.1494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,256,1,0,2.7209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4096,1,0,43.7208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,512,1,0,6.0645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1024,1,0,13.9984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,8192,1,0,96.5434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1536,1,0,23.6492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,0,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,16,1,0,0.3932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,2048,1,0,34.5805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,32,1,0,0.6885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,64,1,0,1.2999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,128,1,0,2.6283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,256,1,0,5.6698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,3072,1,0,62.1945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,512,1,0,12.6296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,4096,1,0,87.9187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,16,1,0,0.6927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1536,1,0,47.3211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,32,1,0,1.3047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1024,1,0,28.2457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,64,1,0,2.6349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,128,1,0,5.4536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,0,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,2048,1,0,68.9715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,32,1,0,2.6394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,512,1,0,25.5970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,64,1,0,5.4566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,16,1,0,1.2949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,128,1,0,11.1717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,256,1,0,11.6198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1024,1,0,56.7190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,256,1,0,23.2647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,512,1,0,51.3334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16,1,0,0.1130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,128,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,64,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,256,1,0,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,512,1,0,0.2433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,0,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.4549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1536,1,0,0.6961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,2048,1,0,0.9708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,3072,1,0,1.9563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4096,1,0,2.7106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,6144,1,0,3.8400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8192,1,0,5.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16384,1,0,11.9970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,10240,1,0,6.8460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,12288,1,0,8.6313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,64,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,256,1,0,0.2342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,128,1,0,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1024,1,0,0.8089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,512,1,0,0.4116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1536,1,0,1.2927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,3072,1,0,3.6665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4096,1,0,5.0507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,2048,1,0,1.8527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,6144,1,0,7.8429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32768,1,0,26.9817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8192,1,0,11.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,10240,1,0,14.0370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16384,1,0,23.8730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,12288,1,0,17.3826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32,1,0,0.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16,1,0,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,256,1,0,0.3891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,64,1,0,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,128,1,0,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1024,1,0,1.5356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,512,1,0,0.7280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32768,1,0,54.3455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1536,1,0,2.4768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,2048,1,0,3.5409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4096,1,0,10.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,3072,1,0,7.3063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,10240,1,0,28.3584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,6144,1,0,15.7681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,12288,1,0,34.9097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8192,1,0,21.9185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,32,1,0,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,64,1,0,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,128,1,0,0.3759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1024,1,0,2.8999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,256,1,0,0.6955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,512,1,0,1.3757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1536,1,0,5.1947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16384,1,0,48.4576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,2048,1,0,7.6890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,3072,1,0,14.3124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32768,1,0,109.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,6144,1,0,32.0224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4096,1,0,20.2126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,0,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8192,1,0,44.4234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,16,1,0,0.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,64,1,0,0.3768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,32,1,0,0.2276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,10240,1,0,56.6813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,128,1,0,0.6738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,256,1,0,1.2919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1024,1,0,6.4195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,512,1,0,2.5911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,12288,1,0,70.0072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1536,1,0,10.8507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16384,1,0,97.6308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,2048,1,0,15.8818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,0,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,16,1,0,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4096,1,0,40.4563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,64,1,0,0.6713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,32,1,0,0.3772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,3072,1,0,29.0524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,128,1,0,1.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,256,1,0,2.4098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,6144,1,0,64.5102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,512,1,0,5.7423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1024,1,0,13.2465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,8192,1,0,89.4708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1536,1,0,22.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,0,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,32,1,0,0.6648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,2048,1,0,32.1938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,16,1,0,0.3752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,64,1,0,1.2552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,128,1,0,2.3418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,256,1,0,5.4059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,3072,1,0,57.7421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,4096,1,0,82.0138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,16,1,0,0.6688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,512,1,0,11.9955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1024,1,0,26.7352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,32,1,0,1.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1536,1,0,44.5026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,64,1,0,2.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,128,1,0,5.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,2048,1,0,64.4674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,512,1,0,24.2808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,0,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,16,1,0,1.2551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,64,1,0,5.2269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,32,1,0,2.3264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,256,1,0,11.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1024,1,0,53.5746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,128,1,0,10.8568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,256,1,0,22.5669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,512,1,0,49.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,64,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,16,1,0,0.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,32,1,0,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,256,1,0,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,128,1,0,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,0,0.0959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,512,1,0,0.2434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1024,1,0,0.4504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1536,1,0,0.6856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,2048,1,0,0.9636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,3072,1,0,1.9547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,4096,1,0,2.6726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,6144,1,0,3.8127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,8192,1,0,5.1785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,16384,1,0,11.6955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,10240,1,0,6.6763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,12288,1,0,8.4071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,16,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,32,1,0,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,64,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,32768,1,0,26.6841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,128,1,0,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1024,1,0,0.8007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,512,1,0,0.4055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,256,1,0,0.2316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1536,1,0,1.2595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,6144,1,0,7.6450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,4096,1,0,4.9941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,10240,1,0,13.5610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,8192,1,0,10.8015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,2048,1,0,1.8276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,3072,1,0,3.6303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,12288,1,0,16.8748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,32,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,16,1,0,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,16384,1,0,23.2845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,256,1,0,0.3860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,64,1,0,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,128,1,0,0.2245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,512,1,0,0.7150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,2048,1,0,3.4813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1536,1,0,2.4313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,32768,1,0,53.8854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1024,1,0,1.5232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,4096,1,0,9.8507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,3072,1,0,7.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,8192,1,0,21.6011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,0,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,6144,1,0,15.3588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,12288,1,0,34.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,16,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,128,1,0,0.3707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,10240,1,0,27.6734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,32,1,0,0.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,64,1,0,0.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,256,1,0,0.6756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,16384,1,0,47.7032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,512,1,0,1.3535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1024,1,0,2.8569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,3072,1,0,13.9901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,2048,1,0,7.4107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,32768,1,0,108.4309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,4096,1,0,19.6923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1536,1,0,4.9332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,6144,1,0,31.5646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,16,1,0,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,32,1,0,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,10240,1,0,55.6208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,64,1,0,0.3721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,128,1,0,0.6536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,8192,1,0,43.6257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,256,1,0,1.2771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,12288,1,0,69.6489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1024,1,0,6.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,512,1,0,2.5361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,16384,1,0,96.5198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,2048,1,0,15.4714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1536,1,0,10.4310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,0,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,4096,1,0,40.3014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,16,1,0,0.2242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,3072,1,0,28.4946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,64,1,0,0.6528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,32,1,0,0.3728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,128,1,0,1.2298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,512,1,0,5.4811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,6144,1,0,63.9289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,256,1,0,2.3770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1536,1,0,21.5715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,2048,1,0,31.6252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1024,1,0,12.7422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,0,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,16,1,0,0.3700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,8192,1,0,88.6377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,32,1,0,0.6526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,64,1,0,1.2289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,256,1,0,5.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,128,1,0,2.2920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,3072,1,0,57.4304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,512,1,0,11.5378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,4096,1,0,81.6157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,32,1,0,1.2405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,64,1,0,2.2828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1024,1,0,26.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,128,1,0,4.9447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1536,1,0,43.8967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,16,1,0,0.6560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,2048,1,0,63.7726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,256,1,0,10.7026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,0,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,512,1,0,23.9312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,32,1,0,2.2969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,64,1,0,4.9491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,128,1,0,10.3634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,16,1,0,1.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1024,1,0,52.9167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,256,1,0,22.0513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,512,1,0,48.3819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,16,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,32,1,0,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,128,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,64,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,512,1,0,0.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,256,1,0,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1536,1,0,0.6731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1024,1,0,0.4405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,2048,1,0,0.9462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,3072,1,0,1.9447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,4096,1,0,2.6587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,8192,1,0,5.0521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,12288,1,0,8.1940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,6144,1,0,3.8286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,10240,1,0,6.6877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,0,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,16,1,0,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,32768,1,0,26.0419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,32,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,16384,1,0,11.5601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,64,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,128,1,0,0.1432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,256,1,0,0.2235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,512,1,0,0.3980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1536,1,0,1.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1024,1,0,0.7822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,6144,1,0,7.4876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,2048,1,0,1.7938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,4096,1,0,4.9050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,3072,1,0,3.5703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,12288,1,0,16.5771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,10240,1,0,13.4842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,16,1,0,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,32,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,8192,1,0,10.5208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,64,1,0,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,16384,1,0,22.9945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,256,1,0,0.3785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,512,1,0,0.7012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,32768,1,0,52.7300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,128,1,0,0.2194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1024,1,0,1.4787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,2048,1,0,3.4142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1536,1,0,2.3784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,6144,1,0,15.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,3072,1,0,6.9719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,12288,1,0,33.4401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,8192,1,0,20.9999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,16,1,0,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,4096,1,0,9.6891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,10240,1,0,27.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,16384,1,0,46.3014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,32,1,0,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,64,1,0,0.2162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,128,1,0,0.3636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,256,1,0,0.6602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1536,1,0,4.7977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1024,1,0,2.7943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,512,1,0,1.3191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,3072,1,0,13.6382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,2048,1,0,7.2266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,4096,1,0,19.0369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,32768,1,0,106.2992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,0,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,6144,1,0,30.8243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,16,1,0,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,8192,1,0,42.4590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,32,1,0,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,10240,1,0,54.6035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,64,1,0,0.3620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,128,1,0,0.6346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,12288,1,0,67.7366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,256,1,0,1.2447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1536,1,0,10.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1024,1,0,5.8856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,16384,1,0,94.3683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,2048,1,0,15.0543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,3072,1,0,27.7167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,0,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,512,1,0,2.4780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,32,1,0,0.3633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,4096,1,0,39.0046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,16,1,0,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,64,1,0,0.6353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,128,1,0,1.1977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,6144,1,0,62.1512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,512,1,0,5.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,256,1,0,2.3004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1024,1,0,12.3694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,8192,1,0,86.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1536,1,0,20.7441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,16,1,0,0.3636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,2048,1,0,30.5425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,32,1,0,0.6320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,64,1,0,1.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,128,1,0,2.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,256,1,0,4.9131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,3072,1,0,55.5819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,512,1,0,11.1614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1024,1,0,25.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,4096,1,0,79.3773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,16,1,0,0.6366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,32,1,0,1.1969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,64,1,0,2.2366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,128,1,0,4.6940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1536,1,0,41.9315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,256,1,0,10.2178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,2048,1,0,61.4330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,0,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,512,1,0,22.9068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,16,1,0,1.2086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,32,1,0,2.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,64,1,0,4.7072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1024,1,0,50.9542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,128,1,0,9.8836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,256,1,0,20.9593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,512,1,0,45.8844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,0,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,32,1,0,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,64,1,0,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,16,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,128,1,0,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,512,1,0,0.2358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,256,1,0,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1024,1,0,0.4348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1536,1,0,0.6690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,2048,1,0,0.9382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,3072,1,0,1.9110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,4096,1,0,2.6745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,6144,1,0,3.7434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,8192,1,0,5.0634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,10240,1,0,6.5599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,16,1,0,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,32,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,64,1,0,0.1083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,256,1,0,0.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,512,1,0,0.3936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,12288,1,0,8.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,128,1,0,0.1406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1024,1,0,0.7725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1536,1,0,1.2336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,3072,1,0,3.5460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,16384,1,0,11.3660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,4096,1,0,4.8989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,2048,1,0,1.7928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,32768,1,0,26.0111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,8192,1,0,10.3620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,6144,1,0,7.3438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,10240,1,0,13.4111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,12288,1,0,16.3488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,16,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,64,1,0,0.1409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,16384,1,0,22.9447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,0,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,128,1,0,0.2167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,256,1,0,0.3709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,32,1,0,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,32768,1,0,52.7099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,512,1,0,0.6905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1024,1,0,1.4598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1536,1,0,2.3671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,3072,1,0,6.9012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,2048,1,0,3.3856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,4096,1,0,9.5924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,6144,1,0,15.0116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,10240,1,0,26.9285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,0,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,16,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,12288,1,0,33.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,32,1,0,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,8192,1,0,20.9308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,16384,1,0,46.3313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,64,1,0,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,128,1,0,0.3578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,256,1,0,0.6512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,512,1,0,1.2992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1536,1,0,4.7769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1024,1,0,2.7497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,2048,1,0,7.0813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,32768,1,0,105.4891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,4096,1,0,18.9371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,6144,1,0,30.4103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,3072,1,0,13.6212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,8192,1,0,42.1485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,16,1,0,0.1409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,32,1,0,0.2170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,10240,1,0,54.3333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,64,1,0,0.3591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,128,1,0,0.6271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,12288,1,0,66.4132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,256,1,0,1.2195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,512,1,0,2.4353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1536,1,0,10.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1024,1,0,5.7351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,16384,1,0,93.4207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,2048,1,0,14.8430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,0,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,16,1,0,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,4096,1,0,38.4816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,3072,1,0,27.4553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,32,1,0,0.3604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,64,1,0,0.6278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,128,1,0,1.1900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,512,1,0,5.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,6144,1,0,60.8828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,256,1,0,2.2652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1536,1,0,20.6636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,16,1,0,0.3568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,2048,1,0,30.0543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,8192,1,0,85.5594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,0,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1024,1,0,12.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,32,1,0,0.6299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,64,1,0,1.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,3072,1,0,55.2161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,128,1,0,2.1797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,256,1,0,4.7364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,512,1,0,10.9373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,4096,1,0,77.4933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1024,1,0,24.6812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,32,1,0,1.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,64,1,0,2.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,16,1,0,0.6273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1536,1,0,41.5577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,128,1,0,4.5824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,256,1,0,10.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,2048,1,0,60.6960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,16,1,0,1.1793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,0,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,512,1,0,22.1899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,32,1,0,2.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,128,1,0,9.7519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1024,1,0,49.5901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,64,1,0,4.5884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,256,1,0,20.5682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,0,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,32,1,0,0.1505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,64,1,0,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,256,1,0,0.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,16,1,0,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,512,1,0,44.7788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,128,1,0,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1536,1,0,1.0280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1024,1,0,0.6905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,2048,1,0,1.3904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,512,1,0,0.3653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,3072,1,0,2.5577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,10240,1,0,8.5260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,6144,1,0,4.9791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,8192,1,0,6.6283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,0,0.1958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,32768,1,0,32.1511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,12288,1,0,10.4342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,16,1,0,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,32,1,0,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,16384,1,0,14.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,64,1,0,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,128,1,0,0.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,256,1,0,0.3581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1024,1,0,1.2934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,512,1,0,0.6642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1536,1,0,1.9629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,4096,1,0,6.5343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,2048,1,0,2.6622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,3072,1,0,4.7914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,8192,1,0,13.4236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,6144,1,0,9.7870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,10240,1,0,16.8277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,0,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,12288,1,0,20.5027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,16,1,0,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,32,1,0,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,16384,1,0,29.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,64,1,0,0.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,512,1,0,1.2430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1024,1,0,2.4775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,256,1,0,0.6520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,32768,1,0,65.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1536,1,0,3.8032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,2048,1,0,5.1735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,128,1,0,0.3583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,3072,1,0,9.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,4096,1,0,12.7345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,6144,1,0,19.3514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,8192,1,0,26.7280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,0,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,12288,1,0,42.0617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,16,1,0,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,32,1,0,0.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,10240,1,0,34.3489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,64,1,0,0.3618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,16384,1,0,57.8368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,128,1,0,0.6466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,256,1,0,1.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,512,1,0,2.3654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1536,1,0,7.6010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,2048,1,0,10.6412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1024,1,0,4.7935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,3072,1,0,18.0193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,32768,1,0,133.3219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,0,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,6144,1,0,40.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,16,1,0,0.2726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,8192,1,0,54.6818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,32,1,0,0.3549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,64,1,0,0.6483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,10240,1,0,69.4672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,128,1,0,1.2102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,256,1,0,2.3162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,512,1,0,4.5623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,12288,1,0,86.1747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1024,1,0,9.7113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1536,1,0,15.4358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,2048,1,0,21.5367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,16384,1,0,120.1892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,0,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,3072,1,0,37.2933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,16,1,0,0.3613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,4096,1,0,51.2644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,32,1,0,0.6478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,64,1,0,1.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,128,1,0,2.3029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,256,1,0,4.4813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,6144,1,0,80.1228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,512,1,0,9.3477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1024,1,0,19.8088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,8192,1,0,109.9546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1536,1,0,31.5204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,0,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,16,1,0,0.6540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,2048,1,0,44.5216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,32,1,0,1.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,64,1,0,2.2952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,256,1,0,9.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,128,1,0,4.4281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,3072,1,0,74.6723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,512,1,0,19.2542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,0,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,4096,1,0,104.7185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,16,1,0,1.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,32,1,0,2.3050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1024,1,0,40.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,64,1,0,4.4543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,128,1,0,9.0946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,4096,1,0,25.3003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1536,1,0,63.7215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,256,1,0,18.5953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,0,0.2942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,16,1,0,2.3364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,32,1,0,4.4716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,512,1,0,39.0549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,2048,1,0,90.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,64,1,0,9.0449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,128,1,0,18.4915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,16,1,0,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,32,1,0,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1024,1,0,82.3314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,256,1,0,38.8689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,128,1,0,0.1528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,64,1,0,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,256,1,0,0.2152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,0,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,512,1,0,0.3413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1024,1,0,0.6630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1536,1,0,1.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,512,1,0,79.5474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,3072,1,0,3.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,4096,1,0,4.3777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,8192,1,0,8.8790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,6144,1,0,6.5378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,10240,1,0,11.4389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,2048,1,0,1.6304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,0,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,16,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,12288,1,0,14.0732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,64,1,0,0.1530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,32,1,0,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,32768,1,0,41.7900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,256,1,0,0.3217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,128,1,0,0.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1024,1,0,1.3082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1536,1,0,2.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,16384,1,0,19.1516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,3072,1,0,6.0363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,2048,1,0,3.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,4096,1,0,8.3940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,512,1,0,0.6014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,6144,1,0,12.9918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,0,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,12288,1,0,27.8270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,16,1,0,0.1327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,10240,1,0,22.7792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,16384,1,0,38.2449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,32,1,0,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,8192,1,0,17.8950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,64,1,0,0.2106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,128,1,0,0.3035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,512,1,0,1.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,256,1,0,0.5493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,2048,1,0,6.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,32768,1,0,84.0011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1536,1,0,4.2593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,3072,1,0,11.8444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1024,1,0,2.5534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,4096,1,0,16.5441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,0,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,6144,1,0,25.7088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,8192,1,0,35.6174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,16,1,0,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,32,1,0,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,12288,1,0,55.9788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,128,1,0,0.5092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,64,1,0,0.3044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,10240,1,0,45.6559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,256,1,0,1.0599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,512,1,0,2.2471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,16384,1,0,76.6661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1024,1,0,4.9107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1536,1,0,8.5435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,3072,1,0,22.9561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,2048,1,0,12.6610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,4096,1,0,32.5432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,6144,1,0,51.7652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,0,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,32768,1,0,168.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,16,1,0,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,32,1,0,0.3004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,8192,1,0,71.6718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,10240,1,0,90.9644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,64,1,0,0.5087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,128,1,0,0.9849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,256,1,0,2.0551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,512,1,0,4.3417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1024,1,0,9.9694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,12288,1,0,111.7594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1536,1,0,17.2736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,2048,1,0,25.6545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,0,0.1307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,16,1,0,0.3040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,16384,1,0,153.5249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,32,1,0,0.5100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,3072,1,0,46.1968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,64,1,0,0.9749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,4096,1,0,65.3513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,128,1,0,1.9235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,256,1,0,3.9036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,512,1,0,8.8280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1024,1,0,20.2596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,0,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,6144,1,0,103.6389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1536,1,0,34.6777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,16,1,0,0.5108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,8192,1,0,143.7272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,32,1,0,0.9821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,2048,1,0,51.7411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,64,1,0,1.9140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,128,1,0,3.6351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,256,1,0,7.9522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,512,1,0,17.9597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,3072,1,0,92.7401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,0,0.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,16,1,0,0.9882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,4096,1,0,130.5309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,32,1,0,1.9020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1024,1,0,40.9279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,64,1,0,3.6344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1536,1,0,69.8624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,128,1,0,7.3861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,256,1,0,16.3017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,0,0.2305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,16,1,0,1.9177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,2048,1,0,103.4895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,32,1,0,3.6093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,512,1,0,36.3886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,64,1,0,7.3836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,128,1,0,15.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,16,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,256,1,0,32.9408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1024,1,0,82.0367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,32,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,128,1,0,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,64,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,256,1,0,0.1812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,512,1,0,0.2808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1024,1,0,0.5033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,512,1,0,73.0302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1536,1,0,0.7747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,2048,1,0,1.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,3072,1,0,2.2009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,6144,1,0,4.4913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,8192,1,0,6.0161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,0,0.1311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,10240,1,0,7.8208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,12288,1,0,9.6293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,16,1,0,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,32,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,64,1,0,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,16384,1,0,13.1540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,128,1,0,0.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,32768,1,0,29.4699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1024,1,0,0.9173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,512,1,0,0.4641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,256,1,0,0.2672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1536,1,0,1.4962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,2048,1,0,2.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,4096,1,0,5.8380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,8192,1,0,12.2364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,6144,1,0,8.9007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,12288,1,0,18.9705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,10240,1,0,15.4845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,3072,1,0,4.2345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,0,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,16384,1,0,26.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,16,1,0,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,32,1,0,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,64,1,0,0.1758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,256,1,0,0.4429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,128,1,0,0.2580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,512,1,0,0.8409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1536,1,0,3.0054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,32768,1,0,59.0362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1024,1,0,1.8287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,2048,1,0,4.2903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,3072,1,0,8.2729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,4096,1,0,11.3531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,8192,1,0,24.1865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,12288,1,0,38.0736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,16,1,0,0.1344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,10240,1,0,31.0299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,16384,1,0,52.7002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,32,1,0,0.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,64,1,0,0.2593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,256,1,0,0.7964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,512,1,0,1.6972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,128,1,0,0.4255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1024,1,0,3.5877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1536,1,0,6.0201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,2048,1,0,8.6629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,32768,1,0,118.0061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,4096,1,0,22.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,6144,1,0,35.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,0,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,8192,1,0,48.6115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,10240,1,0,61.8460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,16,1,0,0.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,64,1,0,0.4231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,12288,1,0,76.0480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,32,1,0,0.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,128,1,0,0.7587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,256,1,0,1.5848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1024,1,0,7.2861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,16384,1,0,105.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1536,1,0,12.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,512,1,0,3.3000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,2048,1,0,17.6460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,0,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,3072,1,0,31.9392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,16,1,0,0.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,4096,1,0,44.4967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,32,1,0,0.4226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,64,1,0,0.7612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,128,1,0,1.5158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,6144,1,0,69.8647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,256,1,0,3.0752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,512,1,0,6.7165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,8192,1,0,97.4632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1536,1,0,24.6994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,0,0.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,16,1,0,0.4254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,2048,1,0,35.5961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,32,1,0,0.7616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,3072,1,0,63.2982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,64,1,0,1.5208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,128,1,0,2.9544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,256,1,0,6.2659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,4096,1,0,88.9169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,512,1,0,13.8133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,0,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1024,1,0,29.9441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,16,1,0,0.7593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,32,1,0,1.5097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,64,1,0,2.9349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1536,1,0,49.3783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,128,1,0,5.9777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,256,1,0,12.8177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,2048,1,0,71.4049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,3072,1,0,15.8251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,0,0.1885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,16,1,0,1.5254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,512,1,0,27.8467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,64,1,0,6.0102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,32,1,0,2.9301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,128,1,0,12.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1024,1,0,60.4077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,16,1,0,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,0,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,64,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,256,1,0,25.9322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,128,1,0,0.1180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,512,1,0,55.5002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,32,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,512,1,0,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,256,1,0,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1024,1,0,0.4238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1536,1,0,0.6216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,2048,1,0,0.8444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,3072,1,0,1.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,4096,1,0,2.3788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,8192,1,0,4.5806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,10240,1,0,5.9981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,12288,1,0,7.4663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,6144,1,0,3.3442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,0,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,32,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,32768,1,0,23.2435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,16,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,128,1,0,0.1552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,64,1,0,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1024,1,0,14.8655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,512,1,0,0.4049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,256,1,0,0.2393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1024,1,0,0.7507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1536,1,0,1.1660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,2048,1,0,1.6083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,3072,1,0,3.2288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,4096,1,0,4.5435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,6144,1,0,6.8430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,8192,1,0,9.3628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,16384,1,0,20.2345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,0,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,10240,1,0,11.7886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,32,1,0,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,16,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,32768,1,0,46.7475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,64,1,0,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,128,1,0,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,256,1,0,0.3896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,512,1,0,0.7119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1024,1,0,1.4280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,2048,1,0,3.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1536,1,0,2.2697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,4096,1,0,8.7628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,8192,1,0,18.3781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,3072,1,0,6.5216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,10240,1,0,23.8492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,12288,1,0,29.3447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,16384,1,0,40.5619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,16,1,0,0.1187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,32,1,0,0.1636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,128,1,0,0.3797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,256,1,0,0.6887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,64,1,0,0.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,512,1,0,1.3567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1024,1,0,2.8795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,32768,1,0,93.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1536,1,0,4.8010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,4096,1,0,16.9873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,6144,1,0,26.7359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,8192,1,0,36.9587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,10240,1,0,47.2950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,12288,1,0,58.5211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,16,1,0,0.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,128,1,0,0.6734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,32,1,0,0.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,256,1,0,1.3047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,16384,1,0,81.0698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1024,1,0,5.9575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,512,1,0,2.7206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1536,1,0,9.7208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,2048,1,0,13.6378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,3072,1,0,24.6878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,4096,1,0,34.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,16,1,0,0.2350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,32,1,0,0.3787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,6144,1,0,53.5035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,64,1,0,0.6705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,128,1,0,1.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,256,1,0,2.6182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,512,1,0,5.6892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1024,1,0,12.2808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1536,1,0,19.5440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,2048,1,0,27.5191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,2048,1,0,6.6819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,3072,1,0,49.2778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,16,1,0,0.3797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,32,1,0,0.6702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,4096,1,0,68.4435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,128,1,0,2.5358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,256,1,0,5.4345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,512,1,0,11.6693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1024,1,0,24.6077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,0,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,16,1,0,0.6704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1536,1,0,39.4443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,32,1,0,1.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,64,1,0,2.5416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,64,1,0,0.3787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,128,1,0,5.3254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,2048,1,0,55.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,256,1,0,11.1817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,512,1,0,23.4010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,16,1,0,1.2689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,0,0.1628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,32,1,0,2.5338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,64,1,0,5.3253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1024,1,0,49.3144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,32,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,16,1,0,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,256,1,0,22.5138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,64,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,512,1,0,47.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,128,1,0,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,256,1,0,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,8192,1,0,74.2227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1024,1,0,0.4046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,512,1,0,0.2340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1536,1,0,0.5991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,2048,1,0,0.8154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,3072,1,0,1.6824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,4096,1,0,2.2547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,10240,1,0,5.5771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,12288,1,0,7.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,8192,1,0,4.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,16384,1,0,9.8791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,16,1,0,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,0,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,64,1,0,1.2593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,32,1,0,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,64,1,0,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,128,1,0,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,256,1,0,0.2275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1536,1,0,1.1044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,512,1,0,0.3815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1024,1,0,0.7207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,2048,1,0,1.5438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,4096,1,0,4.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,3072,1,0,3.0674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,6144,1,0,6.4816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,8192,1,0,9.0471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,10240,1,0,11.4703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,12288,1,0,14.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,0,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,16384,1,0,19.6444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,16,1,0,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,32,1,0,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,64,1,0,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,32768,1,0,45.8168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,128,1,0,0.2236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1024,1,0,1.3560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,256,1,0,0.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1536,1,0,2.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,3072,1,0,6.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,2048,1,0,2.9131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,4096,1,0,8.4685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,6144,1,0,12.9304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,8192,1,0,17.9678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,10240,1,0,23.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,12288,1,0,28.5944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,16,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,16384,1,0,39.5833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,64,1,0,0.2252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,128,1,0,0.3615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,256,1,0,0.6547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1024,1,0,2.5583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,32768,1,0,92.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1536,1,0,4.4526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,2048,1,0,6.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,3072,1,0,11.8644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,4096,1,0,16.5215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,6144,1,0,25.9880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,8192,1,0,36.2086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,10240,1,0,45.8251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,16,1,0,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,12288,1,0,56.7160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,64,1,0,0.3617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,32,1,0,0.2222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,128,1,0,0.6387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,16384,1,0,79.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,256,1,0,1.2376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,512,1,0,2.3762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1024,1,0,5.6145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1536,1,0,9.3196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,2048,1,0,13.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,3072,1,0,23.8733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,512,1,0,0.6757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,4096,1,0,33.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,0,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,32,1,0,0.3630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,16,1,0,0.2246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,6144,1,0,51.8657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,64,1,0,0.6384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,128,1,0,1.2006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,256,1,0,2.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,512,1,0,5.2923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,8192,1,0,72.3736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1024,1,0,11.6785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1536,1,0,18.7982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,2048,1,0,26.5962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,16,1,0,0.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,32,1,0,0.1478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,32,1,0,0.6378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,64,1,0,1.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,128,1,0,2.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,256,1,0,5.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,4096,1,0,66.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,512,1,0,11.0180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,512,1,0,1.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,16,1,0,0.6359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,32,1,0,1.2011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1024,1,0,23.7010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,64,1,0,2.2208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,2048,1,0,53.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,128,1,0,4.9549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,256,1,0,10.6398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,0,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,16,1,0,1.2008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,512,1,0,22.2585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,32,1,0,2.2214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,64,1,0,4.9529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1024,1,0,47.4637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,128,1,0,10.3933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,16,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,256,1,0,21.5953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,512,1,0,44.5101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,64,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,32,1,0,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,128,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,256,1,0,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,512,1,0,0.2311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,2048,1,0,0.7980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1024,1,0,0.4009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,3072,1,0,1.6471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,4096,1,0,2.2511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,6144,1,0,3.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,8192,1,0,4.1676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,10240,1,0,5.4703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,12288,1,0,6.8508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,16384,1,0,9.5961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,0,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,32768,1,0,22.3852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,32,1,0,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,64,1,0,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,128,1,0,0.1464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,256,1,0,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,512,1,0,0.3789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1024,1,0,0.7041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1536,1,0,1.0820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,2048,1,0,1.5165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,3072,1,0,3.0401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,4096,1,0,4.0876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,6144,1,0,6.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,8192,1,0,8.7541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,10240,1,0,11.1226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,12288,1,0,13.7042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,16,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,32,1,0,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,32768,1,0,45.2306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,64,1,0,0.1472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,128,1,0,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,256,1,0,0.3654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,512,1,0,0.6650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1536,1,0,2.0522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1024,1,0,1.3338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,2048,1,0,2.8543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,3072,1,0,5.8990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,4096,1,0,8.1884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,6144,1,0,12.4056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,8192,1,0,17.4712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,10240,1,0,22.5870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,12288,1,0,27.9548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,16,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,16384,1,0,38.9865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,64,1,0,0.2193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,128,1,0,0.3584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,256,1,0,0.6505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,512,1,0,1.2512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1024,1,0,2.4976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,32768,1,0,91.0838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1536,1,0,4.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,2048,1,0,6.0716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,16,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,3072,1,0,11.4481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,4096,1,0,15.9375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,6144,1,0,25.4418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,0,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,8192,1,0,35.3796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,16,1,0,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,10240,1,0,45.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,32,1,0,0.2160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,64,1,0,0.3545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,128,1,0,0.6218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,256,1,0,1.2148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,16384,1,0,78.3774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,12288,1,0,55.9757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,512,1,0,2.3292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1024,1,0,5.3411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,16384,1,0,19.2748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1536,1,0,8.8272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,2048,1,0,12.6577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,16,1,0,0.2193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,0,0.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,4096,1,0,32.4926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,32,1,0,0.3573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,6144,1,0,51.1640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,3072,1,0,23.2752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,64,1,0,0.6220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,128,1,0,1.1771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,256,1,0,2.2406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,8192,1,0,71.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,512,1,0,5.0063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1024,1,0,11.2440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1536,1,0,18.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,0,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,2048,1,0,25.8354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,32,1,0,0.6259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,16,1,0,0.3575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,64,1,0,1.1756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,32,1,0,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,3072,1,0,46.9938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,128,1,0,2.1654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,256,1,0,4.8285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,512,1,0,10.5816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,4096,1,0,65.4578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1024,1,0,22.9199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,32,1,0,1.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,0,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,64,1,0,2.1696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1536,1,0,36.8890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,16,1,0,0.6255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,128,1,0,4.7180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,256,1,0,10.1612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,16,1,0,1.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,2048,1,0,52.3619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,512,1,0,21.6007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,32,1,0,2.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,64,1,0,4.7046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,0,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,128,1,0,9.8992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1024,1,0,46.3458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,256,1,0,20.8512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,16,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,32,1,0,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,64,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,128,1,0,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,512,1,0,0.2281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,256,1,0,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1536,1,0,0.5773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,512,1,0,43.8512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,2048,1,0,0.7844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1024,1,0,0.3899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,3072,1,0,1.6526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,6144,1,0,3.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,8192,1,0,4.1267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,4096,1,0,2.1902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,16384,1,0,9.3345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,0,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,32768,1,0,21.7859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,10240,1,0,5.3721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,16,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,64,1,0,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,128,1,0,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,32,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,512,1,0,0.3671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1024,1,0,0.6886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1536,1,0,1.0555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,3072,1,0,2.9504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,256,1,0,0.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,4096,1,0,4.0400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,8192,1,0,8.5537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,12288,1,0,13.4695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,2048,1,0,1.4766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,10240,1,0,10.9179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,16384,1,0,18.7866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,16,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,32,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,32768,1,0,43.9283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,128,1,0,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,64,1,0,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,256,1,0,0.3602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,512,1,0,0.6467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1024,1,0,1.2940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1536,1,0,2.0040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,4096,1,0,7.9462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,6144,1,0,12.1982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,3072,1,0,5.7267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,8192,1,0,16.9690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,12288,1,0,27.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,16,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,16384,1,0,37.9677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,32,1,0,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,32768,1,0,88.3444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,256,1,0,0.6250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,64,1,0,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,128,1,0,0.3495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1536,1,0,4.0162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,2048,1,0,5.8776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1024,1,0,2.4208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,3072,1,0,11.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,4096,1,0,15.5185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,6144,1,0,24.6997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,8192,1,0,34.3662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,10240,1,0,43.7147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,16,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,32,1,0,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,64,1,0,0.3470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,128,1,0,0.6071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,16384,1,0,75.9456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,512,1,0,2.2618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1024,1,0,5.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1536,1,0,8.5564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,2048,1,0,12.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,0,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,4096,1,0,31.4379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,16,1,0,0.2123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,32,1,0,0.3492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,8192,1,0,68.8786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,64,1,0,0.6069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,128,1,0,1.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,256,1,0,2.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,512,1,0,4.8188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1024,1,0,10.7399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,512,1,0,1.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1536,1,0,17.3706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,2048,1,0,24.8756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,0,0.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,3072,1,0,45.0344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,16,1,0,0.3488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,32,1,0,0.6054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,64,1,0,1.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,128,1,0,2.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,4096,1,0,63.3371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,256,1,0,4.6014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,12288,1,0,54.2996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,512,1,0,10.1245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1536,1,0,35.2860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,16,1,0,0.6088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,2048,1,0,49.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,128,1,0,4.4800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,64,1,0,2.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,3072,1,0,22.5983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,256,1,0,9.7229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,512,1,0,20.4785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,0,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,32,1,0,2.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,16,1,0,1.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1024,1,0,44.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,64,1,0,4.4732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,6144,1,0,49.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,128,1,0,9.4924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,256,1,0,19.7263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,16,1,0,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,64,1,0,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,32,1,0,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,512,1,0,41.6166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,128,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,256,1,0,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,512,1,0,0.2197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1536,1,0,0.5701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1024,1,0,0.3862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,2048,1,0,0.7726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,3072,1,0,1.6089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,4096,1,0,2.2055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,6144,1,0,3.0638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,10240,1,0,5.3417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,16384,1,0,9.2259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,0,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,12288,1,0,6.6456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,32768,1,0,21.7382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,16,1,0,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,32,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,64,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,256,1,0,0.2178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,128,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,512,1,0,0.3653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1024,1,0,0.6790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1536,1,0,1.0443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,3072,1,0,2.9325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,32,1,0,1.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,4096,1,0,4.0083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,8192,1,0,8.3670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,6144,1,0,6.0333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,10240,1,0,10.8254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,12288,1,0,13.3856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,0,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,16,1,0,0.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,16384,1,0,18.6590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,32,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,64,1,0,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,32768,1,0,43.6661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,256,1,0,0.3536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,128,1,0,0.2086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,512,1,0,0.6339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1024,1,0,1.2835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1536,1,0,1.9874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,3072,1,0,5.7054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,6144,1,0,12.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,4096,1,0,7.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,10240,1,0,21.7780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,8192,1,0,16.8059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,12288,1,0,26.9727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,0,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,16,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,16384,1,0,37.4137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,32,1,0,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,64,1,0,0.2058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,256,1,0,0.6151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,128,1,0,0.3448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,512,1,0,1.2027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,32768,1,0,87.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,2048,1,0,5.6817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1536,1,0,3.9724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,4096,1,0,15.4109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,3072,1,0,11.0409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,6144,1,0,24.3472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,8192,1,0,33.8170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,2048,1,0,1.4612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,10240,1,0,43.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,12288,1,0,53.6788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,16,1,0,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,64,1,0,0.3458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,32,1,0,0.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,128,1,0,0.6002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,16384,1,0,74.8437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,256,1,0,1.1635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1536,1,0,8.4363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1024,1,0,4.9642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,2048,1,0,12.0925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,4096,1,0,30.9390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,3072,1,0,22.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,16,1,0,0.2095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,64,1,0,0.5983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,6144,1,0,48.6365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,128,1,0,1.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,256,1,0,2.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,512,1,0,4.6353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,8192,1,0,67.8471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1024,1,0,10.6222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1536,1,0,17.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,0,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,2048,1,0,24.3704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,16,1,0,0.3452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,32,1,0,0.5995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,3072,1,0,44.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,64,1,0,1.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1024,1,0,2.3791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,4096,1,0,61.8751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,128,1,0,2.0605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,256,1,0,4.4862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,512,1,0,9.9071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1024,1,0,21.4891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,16,1,0,0.6011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1536,1,0,34.2582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,64,1,0,2.0713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,2048,1,0,48.6062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,128,1,0,4.3296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,256,1,0,9.6145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,512,1,0,19.9719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,32,1,0,2.0590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,0,0.1396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1024,1,0,43.0998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,64,1,0,4.3148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,128,1,0,9.3133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,256,1,0,19.3158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.1834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,32,1,0,0.3456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.2497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.3743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,0.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,2.9584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,1.5864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,1.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,4.0444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,6.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,8.3583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,13.6162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,10.8542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,19.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.1825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,40.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.2462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.3583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,0.6673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,3.0553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,1.3532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,7.9879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,5.5744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,12.3884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,17.2218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,21.7462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.1945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,27.3166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.1824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.3518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.2461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,512,1,0,40.1983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,0.6415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,1.2439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,82.9849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,2.5914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,4.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,11.3657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,6.1900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,15.7262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,25.0523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,34.6234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,0,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,44.4834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.1831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,55.4220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.3482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,32,1,0,0.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,128,1,0,0.6274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,75.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,256,1,0,1.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,512,1,0,2.3758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,5.0089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,8.7082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,12.4659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,22.4449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,2.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,31.6336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32768,1,0,166.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,50.4697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,0,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,8192,1,0,70.4596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.3494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,64,1,0,0.6293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,128,1,0,1.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,90.4607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,37.0791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,256,1,0,2.2578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,512,1,0,4.5724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,10.2788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,12288,1,0,111.8695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,17.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,25.8728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.3529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,0,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,45.6752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,32,1,0,0.6279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,16384,1,0,153.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,64,1,0,1.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,65.2682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,128,1,0,2.2123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,512,1,0,9.4400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,256,1,0,4.4097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,99.7760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,21.0225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,0,0.1694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,16,1,0,0.6360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,35.8370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,32,1,0,1.1651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,8192,1,0,139.3197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,53.3547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,64,1,0,2.2256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,256,1,0,8.8762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,128,1,0,4.2720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,512,1,0,19.0004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,3072,1,0,91.7459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,0,0.2091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,16,1,0,1.1754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,44.3865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,32,1,0,2.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,64,1,0,4.2861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,4096,1,0,129.9328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,128,1,0,8.6950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,256,1,0,18.3267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1536,1,0,73.1314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,0,0.2984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,16,1,0,2.2492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,32,1,0,4.2664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,512,1,0,40.0255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,2048,1,0,106.3998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,64,1,0,8.7226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,128,1,0,17.9080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.1268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1024,1,0,89.7743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,256,1,0,37.7682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.1580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.2317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.8734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,1.5554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,512,1,0,80.0166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.3980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,4.6808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,2.4154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,6.6574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,10.3093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,18.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.1512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,22.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,14.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,30.5314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.1581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,0.6879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.3504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.2163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,3.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,1.7180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,64.8228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,9.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,4.8701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,20.7140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,13.0478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,28.6711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.1350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,44.4807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,36.7681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.3260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.5956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,61.2681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,1.3653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,3.3924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,6.1579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,18.3168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,9.6127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,131.2179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,41.6463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,26.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,57.6129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,74.4733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.3266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.5485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,1.1605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,90.3665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,6.6904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,12.4651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,2.7224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,19.5349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,124.3154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,36.0168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,51.9827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,84.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,64,1,0,0.5485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,116.3786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,128,1,0,1.0559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,256,1,0,2.2494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32768,1,0,264.9090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,512,1,0,5.2324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,148.9128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,13.5192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,25.3420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,12288,1,0,182.0222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,39.4819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.3289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,32,1,0,0.5484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,72.6355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,64,1,0,1.0557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,128,1,0,2.0606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16384,1,0,249.6159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,256,1,0,4.3562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,104.5112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,512,1,0,10.7731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,27.4937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,0,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,50.7954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,16,1,0,0.5516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,32,1,0,1.0575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,6144,1,0,168.6642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,64,1,0,2.0636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,79.4555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,128,1,0,3.9630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,256,1,0,8.8403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,512,1,0,22.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,234.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,0,0.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,16,1,0,1.0716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,3072,1,0,145.2521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,55.2858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,32,1,0,2.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,64,1,0,3.9558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,128,1,0,8.0353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,256,1,0,17.9080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,0,0.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,102.1146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,16,1,0,2.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,4096,1,0,209.7104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,512,1,0,45.0403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,32,1,0,3.9577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,64,1,0,8.0381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,128,1,0,16.3684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,2048,1,0,159.8450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,256,1,0,36.0964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1024,1,0,110.9186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.3098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.1894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.6107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,512,1,0,89.3174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,0.9970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,1.4811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,2.9779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,6.3668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,4.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,11.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,13.6380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,8.6439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,18.7058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.1334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.1160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.2861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.5181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,41.1345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,1.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,1.9402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,8.1182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,2.9968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,5.7808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,17.4667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,12.7224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,27.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,22.1764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,37.4115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.9388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.4708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,2.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,3.9242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,5.9261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,82.3873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,11.4494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,16.0010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,25.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,35.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,44.9889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,55.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.4468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.8412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,1.9029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,75.7187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,4.4667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,7.9305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,12.0203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,22.0898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,31.7138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.1265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,51.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32768,1,0,165.7999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,70.6428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.4458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.2711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,90.4712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.7958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,1.6969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,110.9385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,3.7478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,9.0343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,16.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,24.4753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,152.1756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.1107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,44.9693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.4463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.7985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,64.1260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,1.6025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,3.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,7.6475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,6144,1,0,102.6193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,18.4208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,32.7909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.4466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.1355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,49.3566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,8192,1,0,142.0736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.7993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,1.5941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,3.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,3072,1,0,89.7308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,15.7543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.7992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,37.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,1.6011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,128.3008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,3.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,6.6785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,6.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1536,1,0,65.7251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,13.6364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,99.1647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,1.6047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,3.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,31.7467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,6.2915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,12.9640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,74.3492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,27.6699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.4814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1536,1,0,0.7377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,512,1,0,64.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,1.0398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,3072,1,0,2.1240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,6144,1,0,4.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,2.9072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,5.8528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,10240,1,0,7.6463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,13.0118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,12288,1,0,9.4306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.4313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32768,1,0,28.9147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.2467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.8614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,2.0092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,3072,1,0,3.9774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,5.6061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1536,1,0,1.3781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,11.9188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,6144,1,0,8.6894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,10240,1,0,15.2029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,25.9167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,12288,1,0,18.6885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.2384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.1568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.4094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.7642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32768,1,0,58.3704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1536,1,0,2.7183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,4.0248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,3072,1,0,8.0579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,6144,1,0,16.9630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,1.6351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,23.7965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,11.0941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,12288,1,0,37.6657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,10240,1,0,30.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.1566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.3931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.7105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,51.8166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,1.4430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,3.3201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,8.3096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1536,1,0,5.7048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,21.6020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32768,1,0,116.9475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,48.0722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,6144,1,0,34.5180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,3072,1,0,15.4377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,10240,1,0,61.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.6853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.2382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,1.3606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,12288,1,0,75.3691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.3930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,2.9415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,6.8178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,104.1610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,17.0352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1536,1,0,11.5626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,3072,1,0,31.2864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.2373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,43.7534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.3927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.6853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,1.3121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,2.7160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,6.0782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,6144,1,0,68.9334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,13.9538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1536,1,0,23.4579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,8192,1,0,96.7151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.3932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.6900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,34.3469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,1.2969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,2.6382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,3072,1,0,62.3858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,5.6661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,12.5179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,28.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.6935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,87.9273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,1.3030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,2.6232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1536,1,0,47.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,5.4566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,11.6201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,1.3017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,5.4568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,2.6372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,68.9655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,11.2467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,56.5449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,25.5754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,23.4778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,512,1,0,51.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.1504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.2487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.9805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1536,1,0,0.6883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,3072,1,0,1.9857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.4535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,6144,1,0,3.8872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,2.7058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,11.8937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,12288,1,0,8.6580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32768,1,0,27.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,10240,1,0,6.8677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.1471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.4107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,1.8663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,3072,1,0,3.6272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1536,1,0,1.2931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,6144,1,0,7.9435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,10.9986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,10240,1,0,13.9699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,12288,1,0,17.3095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,23.8857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.8115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,4.9745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32768,1,0,54.6030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.3910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1536,1,0,2.4771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,1.5338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,3072,1,0,7.3759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.7281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,10.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,6144,1,0,15.7031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,22.0013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,3.5555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,12288,1,0,34.8495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,10240,1,0,28.4120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.1185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.3760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.6895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,48.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,1.3697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,2.9218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1536,1,0,5.1987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,7.7241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32768,1,0,109.9890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,20.0094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,3072,1,0,14.3238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,6144,1,0,32.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,10240,1,0,56.5743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,44.3588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.3771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.6669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,1.2928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,6.3560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,12288,1,0,69.6662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,97.5511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1536,1,0,10.8143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,2.5838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,3072,1,0,29.2224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.2307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,15.8544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.6714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,40.6462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,1.2483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,2.4107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,5.7938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,6144,1,0,63.8931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,8192,1,0,89.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,13.2235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1536,1,0,22.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.3769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,32.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.6698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,1.2506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,2.3564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,3072,1,0,58.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,11.9624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,5.4015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,81.8780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.6672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,26.7459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,2.3666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,1.2470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,5.1987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1536,1,0,44.4153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,64.5766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,11.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,1.2566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,2.3248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,24.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,53.8705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,5.2298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,10.8077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,22.4536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,16,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,32,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,64,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,512,1,0,0.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,512,1,0,49.0580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,256,1,0,0.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,128,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1024,1,0,0.4469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,2048,1,0,0.9626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1536,1,0,0.6809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,3072,1,0,1.9830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,8192,1,0,5.1740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,4096,1,0,2.6960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,6144,1,0,3.8595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,16384,1,0,11.7372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,10240,1,0,6.7150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,12288,1,0,8.4402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,64,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,32768,1,0,26.5640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,16,1,0,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,32,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,256,1,0,0.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,512,1,0,0.4050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1536,1,0,1.2661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,128,1,0,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,3072,1,0,3.6277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,2048,1,0,1.8364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,8192,1,0,10.5877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,10240,1,0,13.5514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,6144,1,0,7.5887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,12288,1,0,16.8407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1024,1,0,0.8004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,4096,1,0,4.9808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,16384,1,0,23.4485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,32,1,0,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,16,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,256,1,0,0.3854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,512,1,0,0.7155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,128,1,0,0.2254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,2048,1,0,3.4707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1536,1,0,2.4450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1024,1,0,1.5330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,32768,1,0,53.9477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,4096,1,0,9.8668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,3072,1,0,7.1571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,6144,1,0,15.4293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,64,1,0,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,10240,1,0,27.7100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,12288,1,0,34.4348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,16,1,0,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,8192,1,0,21.5110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,16384,1,0,47.6647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,64,1,0,0.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,32,1,0,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,256,1,0,0.6800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,128,1,0,0.3708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1024,1,0,2.8607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1536,1,0,4.9437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,512,1,0,1.3585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,3072,1,0,14.0023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,32768,1,0,108.3831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,2048,1,0,7.3776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,4096,1,0,19.7485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,6144,1,0,31.4891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,0,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,8192,1,0,43.6605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,16,1,0,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,32,1,0,0.2245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,64,1,0,0.3713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,10240,1,0,56.0664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,128,1,0,0.6529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,256,1,0,1.2712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,12288,1,0,69.4510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1024,1,0,6.1187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1536,1,0,10.4227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,2048,1,0,15.4664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,16384,1,0,96.8497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,512,1,0,2.5499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,3072,1,0,28.4655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,4096,1,0,40.0673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,16,1,0,0.2245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,0,0.0957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,32,1,0,0.3710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,64,1,0,0.6567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,128,1,0,1.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,6144,1,0,63.7922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,512,1,0,5.5041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,256,1,0,2.3715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1024,1,0,12.8031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,8192,1,0,88.4810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,2048,1,0,31.7351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,32,1,0,0.6535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,16,1,0,0.3715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1536,1,0,21.6457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,64,1,0,1.2325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,3072,1,0,57.3882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,256,1,0,5.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,128,1,0,2.2887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,512,1,0,11.5316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,4096,1,0,81.1390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,32,1,0,1.2337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1024,1,0,26.2324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,16,1,0,0.6521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,64,1,0,2.2830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,128,1,0,4.9419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,2048,1,0,63.8233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1536,1,0,44.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,256,1,0,10.7074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,0,0.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,16,1,0,1.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,32,1,0,2.2888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,64,1,0,4.9440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1024,1,0,52.9782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,512,1,0,23.7922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,128,1,0,10.4405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,16,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,256,1,0,22.0288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,128,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,32,1,0,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,256,1,0,0.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,512,1,0,0.2367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1024,1,0,0.4390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,512,1,0,48.0721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,64,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,3072,1,0,1.9549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1536,1,0,0.6670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,2048,1,0,0.9480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,6144,1,0,3.7967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,4096,1,0,2.6289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,8192,1,0,5.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,12288,1,0,8.2542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,10240,1,0,6.6058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,16,1,0,0.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,32768,1,0,26.1232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,0,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,16384,1,0,11.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,256,1,0,0.2279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,128,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,64,1,0,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,32,1,0,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1024,1,0,0.7822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1536,1,0,1.2528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,512,1,0,0.3983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,2048,1,0,1.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,4096,1,0,4.9003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,3072,1,0,3.5441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,8192,1,0,10.5782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,10240,1,0,13.5283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,12288,1,0,16.4027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,0,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,16,1,0,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,16384,1,0,23.1287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,6144,1,0,7.5047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,64,1,0,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,128,1,0,0.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,32768,1,0,52.7023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,256,1,0,0.3757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,32,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1024,1,0,1.4866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,512,1,0,0.6962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,3072,1,0,6.9337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,2048,1,0,3.4292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,4096,1,0,9.6921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1536,1,0,2.3786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,8192,1,0,21.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,12288,1,0,33.4956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,6144,1,0,14.9686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,32,1,0,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,10240,1,0,27.2387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,16,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,16384,1,0,46.2071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,64,1,0,0.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,128,1,0,0.3610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1024,1,0,2.8114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,512,1,0,1.3240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1536,1,0,4.7574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,3072,1,0,13.6703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,4096,1,0,19.1895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,2048,1,0,7.1529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,256,1,0,0.6629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,6144,1,0,30.8693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,32768,1,0,106.3917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,8192,1,0,42.5999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,32,1,0,0.2154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,16,1,0,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,10240,1,0,54.0308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,64,1,0,0.3631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,128,1,0,0.6380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,12288,1,0,67.5675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,256,1,0,1.2433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1024,1,0,5.9129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1536,1,0,10.1392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,512,1,0,2.4956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,16384,1,0,93.7607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,2048,1,0,15.0138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,16,1,0,0.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,3072,1,0,27.7808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,64,1,0,0.6370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,4096,1,0,39.0366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,128,1,0,1.2018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,256,1,0,2.3076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,512,1,0,5.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1024,1,0,12.3278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,6144,1,0,62.0152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,8192,1,0,87.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,32,1,0,0.3619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1536,1,0,20.8129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,2048,1,0,30.6763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,16,1,0,0.3643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,64,1,0,1.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,256,1,0,4.8992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,32,1,0,0.6327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,3072,1,0,55.5629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,512,1,0,11.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,128,1,0,2.2157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,4096,1,0,78.4093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1024,1,0,25.1623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,32,1,0,1.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,64,1,0,2.2185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1536,1,0,41.9362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,16,1,0,0.6348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,256,1,0,10.2182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,128,1,0,4.7025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,32,1,0,2.2273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,2048,1,0,62.0431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,512,1,0,22.7007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,16,1,0,1.2003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,64,1,0,4.7182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,0,0.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,16,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,256,1,0,21.0243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1024,1,0,51.0934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,128,1,0,9.9111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,64,1,0,0.0961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,32,1,0,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,128,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,512,1,0,46.3021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,0,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,256,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1024,1,0,0.4329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,512,1,0,0.2343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,2048,1,0,0.9410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1536,1,0,0.6682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,6144,1,0,3.7909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,10240,1,0,6.5379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,4096,1,0,2.6717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,12288,1,0,8.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,8192,1,0,5.0547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,3072,1,0,1.9254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,32768,1,0,25.9978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,16384,1,0,11.2815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,64,1,0,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,128,1,0,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,16,1,0,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,256,1,0,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,512,1,0,0.3925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,32,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,2048,1,0,1.7803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1024,1,0,0.7734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,4096,1,0,4.9276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,3072,1,0,3.5549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1536,1,0,1.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,8192,1,0,10.4194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,12288,1,0,16.4219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,10240,1,0,13.3264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,6144,1,0,7.3800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,32,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,64,1,0,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,16384,1,0,22.8986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,16,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,512,1,0,0.6900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,128,1,0,0.2151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1024,1,0,1.4701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,3072,1,0,6.8979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,256,1,0,0.3687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,32768,1,0,52.4419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,2048,1,0,3.3954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1536,1,0,2.3500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,6144,1,0,14.9057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,4096,1,0,9.4987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,10240,1,0,26.8853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,8192,1,0,21.0107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,16,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,32,1,0,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,512,1,0,1.3038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,64,1,0,0.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1024,1,0,2.7525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,128,1,0,0.3599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,12288,1,0,33.3651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,16384,1,0,46.2320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,256,1,0,0.6523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1536,1,0,4.7580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,2048,1,0,7.0809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,32768,1,0,105.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,4096,1,0,19.0305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,3072,1,0,13.4834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,8192,1,0,42.4256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,32,1,0,0.2157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,6144,1,0,30.4313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,16,1,0,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,64,1,0,0.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,12288,1,0,66.4768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,256,1,0,1.2205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,10240,1,0,54.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1024,1,0,5.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,512,1,0,2.4497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,128,1,0,0.6308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,16384,1,0,92.9670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1536,1,0,10.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,2048,1,0,14.7954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,16,1,0,0.2175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,0,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,4096,1,0,38.8190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,64,1,0,0.6280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,128,1,0,1.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,6144,1,0,61.0514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,32,1,0,0.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,3072,1,0,27.2571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,256,1,0,2.2798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,512,1,0,5.1648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,8192,1,0,85.4457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1536,1,0,20.4913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,16,1,0,0.3572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,0,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,2048,1,0,30.0460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1024,1,0,12.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,64,1,0,1.1809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,32,1,0,0.6267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,3072,1,0,54.8341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,128,1,0,2.1904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,256,1,0,4.7731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,4096,1,0,77.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,512,1,0,10.9747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,16,1,0,0.6280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1024,1,0,24.6339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,32,1,0,1.1806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,64,1,0,2.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,128,1,0,4.6157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1536,1,0,41.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,2048,1,0,60.4275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,256,1,0,10.0894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,512,1,0,22.2575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,16,1,0,1.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,0,0.1410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,64,1,0,4.5971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,128,1,0,9.7447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1024,1,0,49.9506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,32,1,0,2.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,256,1,0,20.4199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,32,1,0,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,0,0.1819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,16,1,0,0.1592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,512,1,0,44.7854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,128,1,0,0.1812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,256,1,0,0.2445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,512,1,0,0.3664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1024,1,0,0.6895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1536,1,0,1.0275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,2048,1,0,1.3962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,3072,1,0,2.5729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,4096,1,0,3.4692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,64,1,0,0.1557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,8192,1,0,6.5626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,10240,1,0,8.5440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,0,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,16384,1,0,14.2428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,16,1,0,0.1487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,12288,1,0,10.5213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,64,1,0,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,32768,1,0,32.4553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,6144,1,0,4.9666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,32,1,0,0.1567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,128,1,0,0.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1024,1,0,1.2898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,2048,1,0,2.6644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1536,1,0,1.9636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,3072,1,0,4.8489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,512,1,0,0.6630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,4096,1,0,6.4796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,8192,1,0,13.3872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,256,1,0,0.3595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,12288,1,0,20.7451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,10240,1,0,16.9793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,0,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,6144,1,0,9.8090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,16,1,0,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,64,1,0,0.2439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,128,1,0,0.3583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,16384,1,0,28.9358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,512,1,0,1.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,32,1,0,0.1819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1024,1,0,2.4648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,256,1,0,0.6517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,32768,1,0,64.2580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1536,1,0,3.7928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,2048,1,0,5.2407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,3072,1,0,9.4713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,4096,1,0,12.6677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,8192,1,0,26.7070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,0,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,6144,1,0,19.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,16,1,0,0.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,10240,1,0,34.6258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,16384,1,0,58.6373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,32,1,0,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,64,1,0,0.3636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,12288,1,0,42.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,128,1,0,0.6479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,512,1,0,2.3725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,256,1,0,1.2104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1536,1,0,7.6701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,2048,1,0,10.5539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1024,1,0,4.7874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,3072,1,0,18.0796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,32768,1,0,132.3607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,4096,1,0,24.9485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,16,1,0,0.2451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,0,0.1856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,6144,1,0,40.0430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,8192,1,0,54.5410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,32,1,0,0.3547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,64,1,0,0.6458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,128,1,0,1.2104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,10240,1,0,70.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,256,1,0,2.3207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,512,1,0,4.5668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,12288,1,0,86.3552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1024,1,0,9.7135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1536,1,0,15.3545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,2048,1,0,21.8177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,0,0.1539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,3072,1,0,37.3865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,16384,1,0,119.8875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,16,1,0,0.3610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,32,1,0,0.6487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,4096,1,0,51.4633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,64,1,0,1.2105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,128,1,0,2.3013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,256,1,0,4.4640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,6144,1,0,80.0511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,512,1,0,9.4630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1024,1,0,19.9017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,0,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,8192,1,0,114.6287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1536,1,0,31.7049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,16,1,0,0.6559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,32,1,0,1.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,2048,1,0,44.0276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,64,1,0,2.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,3072,1,0,75.0181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,256,1,0,9.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,512,1,0,19.3409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,128,1,0,4.4375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,0,0.2067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,4096,1,0,102.0793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,16,1,0,1.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1024,1,0,40.5988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,32,1,0,2.3053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,64,1,0,4.4530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,128,1,0,9.0374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1536,1,0,63.7431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,256,1,0,18.6283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,2048,1,0,91.9569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,32,1,0,4.4410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,16,1,0,2.3387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,64,1,0,9.0857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,512,1,0,39.3746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,128,1,0,18.4967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1024,1,0,83.6802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,16,1,0,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,32,1,0,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,256,1,0,38.3352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,64,1,0,0.1512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,0,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,128,1,0,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,256,1,0,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,512,1,0,0.3411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1024,1,0,0.6673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1536,1,0,1.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,512,1,0,79.7412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,3072,1,0,3.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,2048,1,0,1.6387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,6144,1,0,6.5590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,4096,1,0,4.3516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,8192,1,0,8.8650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,12288,1,0,14.0814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,10240,1,0,11.4905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,16,1,0,0.1274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,16384,1,0,19.2052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,0,0.1495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,32,1,0,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,64,1,0,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,128,1,0,0.2052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,32768,1,0,41.8322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,256,1,0,0.3222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,512,1,0,0.5998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1024,1,0,1.2940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1536,1,0,2.1825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,3072,1,0,6.0399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,4096,1,0,8.3837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,2048,1,0,3.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,6144,1,0,13.0317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,8192,1,0,17.9651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,10240,1,0,22.7165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,0,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,12288,1,0,27.8372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,16,1,0,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,32,1,0,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,64,1,0,0.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,16384,1,0,38.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,128,1,0,0.3026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,256,1,0,0.5525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,512,1,0,1.1591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1024,1,0,2.5524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,2048,1,0,6.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,32768,1,0,83.6874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,3072,1,0,11.8458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,4096,1,0,16.5156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,8192,1,0,35.6142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,6144,1,0,25.7563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,0,0.1517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,10240,1,0,45.8242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,16,1,0,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,64,1,0,0.3009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,12288,1,0,55.7637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,32,1,0,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,128,1,0,0.5112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,16384,1,0,76.7188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,256,1,0,1.0568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,512,1,0,2.2731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1024,1,0,4.9191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1536,1,0,8.5240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,3072,1,0,22.9605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,4096,1,0,32.4085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,2048,1,0,12.6851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,32768,1,0,168.2088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,6144,1,0,51.7915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,0,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,8192,1,0,71.4708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,16,1,0,0.2069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,32,1,0,0.3031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,64,1,0,0.5116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,10240,1,0,91.2211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,128,1,0,0.9801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,256,1,0,2.0508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,512,1,0,4.3652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1024,1,0,9.9969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,12288,1,0,111.6933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1536,1,0,17.1968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,2048,1,0,25.6622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,0,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1536,1,0,4.2649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,3072,1,0,46.3049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,16384,1,0,153.5607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,16,1,0,0.3039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,32,1,0,0.5133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,4096,1,0,65.4493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,64,1,0,0.9799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,128,1,0,1.8965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,256,1,0,3.9152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,512,1,0,8.8182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,6144,1,0,103.5416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1024,1,0,20.2549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,0,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1536,1,0,34.7776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,16,1,0,0.5125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,32,1,0,0.9792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,8192,1,0,143.2359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,2048,1,0,51.5928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,64,1,0,1.8993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,128,1,0,3.6454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,256,1,0,7.9688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,3072,1,0,92.4929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,512,1,0,17.9492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,0,0.1654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1024,1,0,40.8569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,16,1,0,0.9893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,4096,1,0,130.4576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,32,1,0,1.8893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,128,1,0,7.4302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,64,1,0,3.6257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1536,1,0,69.6043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,256,1,0,16.2148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,0,0.2301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,32,1,0,3.6146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,16,1,0,1.9263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,512,1,0,36.5340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,2048,1,0,103.4292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,64,1,0,7.3835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,128,1,0,15.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,0,0.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,16,1,0,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,256,1,0,33.0449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,64,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1024,1,0,81.8132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,128,1,0,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,256,1,0,0.1780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,32,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,512,1,0,0.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1024,1,0,0.5020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,2048,1,0,1.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,512,1,0,72.8167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,3072,1,0,2.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,4096,1,0,3.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1536,1,0,0.7748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,8192,1,0,6.0017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,10240,1,0,7.8407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,6144,1,0,4.5023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,12288,1,0,9.6606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,16,1,0,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,16384,1,0,13.1715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,64,1,0,0.1324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,0,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,32,1,0,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,32768,1,0,29.3455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,128,1,0,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,2048,1,0,2.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,256,1,0,0.2707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,3072,1,0,4.2456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1536,1,0,1.4808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,4096,1,0,5.8004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,6144,1,0,8.8685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,8192,1,0,12.2615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,10240,1,0,15.4372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,0,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,16,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,32,1,0,0.1335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,16384,1,0,26.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,64,1,0,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,128,1,0,0.2588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,256,1,0,0.4434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,32768,1,0,59.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,512,1,0,0.8484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1024,1,0,1.8465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1536,1,0,3.0099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,3072,1,0,8.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,2048,1,0,4.2783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,4096,1,0,11.3488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,8192,1,0,24.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,12288,1,0,38.0644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,10240,1,0,31.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,16384,1,0,52.5096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,16,1,0,0.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,32,1,0,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,64,1,0,0.2674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,128,1,0,0.4240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,256,1,0,0.7983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,512,1,0,1.6946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1024,1,0,3.5787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1536,1,0,6.0293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,32768,1,0,118.5532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,2048,1,0,8.6717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,3072,1,0,15.7978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,4096,1,0,22.0483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,6144,1,0,35.1552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,32,1,0,0.2589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,64,1,0,0.4243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,12288,1,0,75.8643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,128,1,0,0.7619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,256,1,0,1.6027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,16384,1,0,105.3501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,512,1,0,3.2984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1024,1,0,7.3207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,2048,1,0,17.6303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,3072,1,0,31.8870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,4096,1,0,44.5796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,16,1,0,0.2614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,32,1,0,0.4233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,64,1,0,0.7621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,8192,1,0,97.0612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,128,1,0,1.5042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,256,1,0,3.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,512,1,0,6.7340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1024,1,0,14.8858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1536,1,0,24.7211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,2048,1,0,35.5049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,10240,1,0,61.8045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,3072,1,0,63.4976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,16,1,0,0.4265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,16,1,0,0.1751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,8192,1,0,48.6204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,32,1,0,0.7596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,64,1,0,1.5027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,128,1,0,2.9308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,4096,1,0,89.3315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,512,1,0,13.7195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,256,1,0,6.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1024,1,0,30.0133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,16,1,0,0.7632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,32,1,0,1.5284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,64,1,0,2.9384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1536,1,0,49.4446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,128,1,0,6.0188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,2048,1,0,71.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,256,1,0,12.8014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,0,0.1872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,6144,1,0,69.8128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,16,1,0,1.5204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,512,1,0,27.8522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,32,1,0,2.9412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,64,1,0,5.9630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,128,1,0,12.2614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,16,1,0,0.1205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1024,1,0,60.1243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,256,1,0,25.8014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,64,1,0,0.1065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,32,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,128,1,0,0.1170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,512,1,0,0.2444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,512,1,0,55.7127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,256,1,0,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1024,1,0,0.4241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,2048,1,0,0.8379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,3072,1,0,1.7223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1536,1,0,0.6204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,4096,1,0,2.3463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,6144,1,0,3.3544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,8192,1,0,4.5855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,10240,1,0,5.9932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,0,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,16384,1,0,10.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,12288,1,0,7.4096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,16,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,32768,1,0,23.3239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,128,1,0,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,64,1,0,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,256,1,0,0.2386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,512,1,0,0.4032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,32,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1024,1,0,0.7526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1536,1,0,1.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,2048,1,0,1.6104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,6144,1,0,6.8429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,8192,1,0,9.3996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,3072,1,0,3.2337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,10240,1,0,11.8339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,4096,1,0,4.4964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,0,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,16,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,32,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,16384,1,0,20.1466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,64,1,0,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,32768,1,0,46.9619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,128,1,0,0.2310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,512,1,0,0.7131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1536,1,0,2.2568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,2048,1,0,3.2365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1024,1,0,1.4313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,256,1,0,0.3906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,4096,1,0,8.7862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,6144,1,0,13.3040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,3072,1,0,6.4995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,8192,1,0,18.3802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,0,0.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,12288,1,0,29.4237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,16,1,0,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,16384,1,0,40.5004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,10240,1,0,23.7550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,128,1,0,0.3813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,64,1,0,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,32,1,0,0.1543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,256,1,0,0.6834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,512,1,0,1.3570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,32768,1,0,93.6755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,4096,1,0,16.9530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,3072,1,0,12.2355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1536,1,0,4.8015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1024,1,0,2.8718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,2048,1,0,6.7161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,6144,1,0,26.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,16,1,0,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,8192,1,0,36.9603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,32,1,0,0.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,64,1,0,0.3806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,128,1,0,0.6708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,12288,1,0,58.4064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,256,1,0,1.3046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,10240,1,0,47.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,512,1,0,2.7368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1024,1,0,5.9717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,2048,1,0,13.6549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,16384,1,0,81.5242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,3072,1,0,24.6786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,4096,1,0,34.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,64,1,0,0.6644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,32,1,0,0.3816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,6144,1,0,53.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,128,1,0,1.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,256,1,0,2.6196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,512,1,0,5.6411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,8192,1,0,74.3995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1024,1,0,12.2874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1536,1,0,19.5578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,2048,1,0,27.4851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,0,0.1084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,16,1,0,0.3828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,32,1,0,0.6689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,3072,1,0,49.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,64,1,0,1.2657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,128,1,0,2.5280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,256,1,0,5.4668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,4096,1,0,68.3888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,512,1,0,11.6110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,0,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1024,1,0,24.6104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,32,1,0,1.2630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,16,1,0,0.6630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,64,1,0,2.5550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,128,1,0,5.2802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1536,1,0,39.4466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,2048,1,0,55.2962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,256,1,0,11.1281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,0,0.1609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,512,1,0,23.3857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,32,1,0,2.5489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,16,1,0,1.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,64,1,0,5.2799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,128,1,0,10.8500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1024,1,0,49.7031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,256,1,0,22.5045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,16,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,64,1,0,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,32,1,0,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,0,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,512,1,0,47.0259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,128,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,512,1,0,0.2353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1024,1,0,0.4059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1536,1,0,0.5956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,3072,1,0,1.6904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,6144,1,0,3.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,4096,1,0,2.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,16,1,0,0.2366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,2048,1,0,0.8108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,8192,1,0,4.2150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,10240,1,0,5.6056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,12288,1,0,7.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,16384,1,0,9.8909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,64,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,32,1,0,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,256,1,0,0.2230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,32768,1,0,22.7673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,512,1,0,0.3827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,128,1,0,0.1477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1024,1,0,0.7241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1536,1,0,1.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,2048,1,0,1.5398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,3072,1,0,3.0729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,6144,1,0,6.4428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,4096,1,0,4.1656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,10240,1,0,11.4979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,8192,1,0,9.0382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,0,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,12288,1,0,14.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,16,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,16384,1,0,19.7050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,32,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,64,1,0,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,512,1,0,0.6760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,128,1,0,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,256,1,0,0.3725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1536,1,0,2.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,32768,1,0,45.8846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,2048,1,0,2.9174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1024,1,0,1.3621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,3072,1,0,6.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,4096,1,0,8.4488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,6144,1,0,12.9030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,8192,1,0,17.9268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,0,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,10240,1,0,23.1456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,16,1,0,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,32,1,0,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,12288,1,0,28.4919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,64,1,0,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,128,1,0,0.3622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,16384,1,0,39.5714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,256,1,0,0.6541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,512,1,0,1.2776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1536,1,0,4.4312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,256,1,0,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1024,1,0,2.5660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,32768,1,0,91.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,2048,1,0,6.3925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,3072,1,0,11.8332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,4096,1,0,16.4836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,0,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,8192,1,0,36.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,16,1,0,0.1481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,6144,1,0,26.0276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,32,1,0,0.2204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,64,1,0,0.3658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,128,1,0,0.6370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,12288,1,0,57.0234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,256,1,0,1.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,10240,1,0,45.8000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,16384,1,0,79.0478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1024,1,0,5.6375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,512,1,0,2.3945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1536,1,0,9.2580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,2048,1,0,13.1974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,3072,1,0,23.9454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,32,1,0,0.3613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,4096,1,0,33.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,64,1,0,0.6377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,16,1,0,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,128,1,0,1.1972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,256,1,0,2.2845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,6144,1,0,51.7481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,512,1,0,5.2932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1024,1,0,11.7517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,8192,1,0,72.0423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1536,1,0,18.8974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,2048,1,0,26.4973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,16,1,0,0.3633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,32,1,0,0.6368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,64,1,0,1.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,512,1,0,11.0290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,128,1,0,2.2318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,256,1,0,5.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,4096,1,0,66.1707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1024,1,0,23.6933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,0,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,16,1,0,0.6373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,64,1,0,2.2129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,32,1,0,1.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,128,1,0,4.9656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,2048,1,0,52.8591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,256,1,0,10.7075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,0,0.1526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,512,1,0,22.3501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,32,1,0,2.2233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,64,1,0,4.9746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1024,1,0,47.1971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,128,1,0,10.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,256,1,0,21.6265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,16,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,512,1,0,44.3388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,32,1,0,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,64,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,128,1,0,0.1217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,512,1,0,0.2317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1024,1,0,0.4013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,2048,1,0,0.7938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,3072,1,0,1.6757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1536,1,0,0.5866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,4096,1,0,2.2663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,6144,1,0,3.1307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,10240,1,0,5.5203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,8192,1,0,4.1844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,12288,1,0,6.8463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,16384,1,0,9.6003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,0,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,16,1,0,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,64,1,0,0.1138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,256,1,0,0.2215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,128,1,0,0.1486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,512,1,0,0.3782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1536,1,0,1.0795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,2048,1,0,1.5095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,3072,1,0,3.0123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,4096,1,0,4.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,6144,1,0,6.2443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,8192,1,0,8.7193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,10240,1,0,11.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,16384,1,0,19.2207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,16,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,32,1,0,0.1137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,32768,1,0,45.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,64,1,0,0.1482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,128,1,0,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,256,1,0,0.3666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1024,1,0,1.3285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,2048,1,0,2.8577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1536,1,0,2.0634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,3072,1,0,5.8801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,4096,1,0,8.0988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,6144,1,0,12.5103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,8192,1,0,17.4675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,10240,1,0,22.5395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,12288,1,0,28.0079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,0,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,16,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,32,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1024,1,0,0.7044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,128,1,0,0.3584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,32768,1,0,91.0535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,256,1,0,0.6432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,64,1,0,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,512,1,0,1.2510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1024,1,0,2.4856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1536,1,0,4.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,2048,1,0,6.0607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,3072,1,0,11.4612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,12288,1,0,13.7644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,4096,1,0,15.9873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,6144,1,0,25.5204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,8192,1,0,35.4288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,512,1,0,0.6606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,10240,1,0,45.2129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,0,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,12288,1,0,56.1537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,16,1,0,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,32,1,0,0.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,256,1,0,1.2063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,16384,1,0,78.4734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,512,1,0,2.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,64,1,0,0.3590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1024,1,0,5.3353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,2048,1,0,12.6547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1536,1,0,8.8251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,3072,1,0,23.3750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,4096,1,0,32.5572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,16,1,0,0.2198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,16384,1,0,39.0981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,32,1,0,0.3578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,6144,1,0,50.9895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,64,1,0,0.6252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,8192,1,0,71.4490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,128,1,0,1.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,256,1,0,2.2558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1024,1,0,11.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1536,1,0,18.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,2048,1,0,25.8521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,16,1,0,0.3576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,32,1,0,0.6258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,64,1,0,1.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,128,1,0,2.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,4096,1,0,65.3624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,256,1,0,4.8634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,512,1,0,10.5406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,0,0.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1024,1,0,22.9690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,16,1,0,0.6255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1536,1,0,36.9147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,32,1,0,1.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,64,1,0,2.1644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,2048,1,0,52.3848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,128,1,0,4.6906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,256,1,0,10.1437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,512,1,0,21.5643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,16,1,0,1.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,32,1,0,2.1708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,0,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1024,1,0,46.3651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,128,1,0,9.8704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,256,1,0,20.8836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,16,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,32,1,0,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,512,1,0,43.8871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,64,1,0,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,128,1,0,0.1119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,256,1,0,0.1442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,512,1,0,0.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1536,1,0,0.5748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,2048,1,0,0.7812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,3072,1,0,1.6341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,4096,1,0,2.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,6144,1,0,3.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,10240,1,0,5.3707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,16384,1,0,9.3905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,0,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,32768,1,0,21.8406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,16,1,0,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,3072,1,0,46.7358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,32,1,0,0.1015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,64,1,0,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,512,1,0,0.3689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,256,1,0,0.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1024,1,0,0.6870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,128,1,0,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,2048,1,0,1.4799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,3072,1,0,2.9739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,4096,1,0,4.0369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,8192,1,0,8.5513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,64,1,0,4.7182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,10240,1,0,10.8772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,12288,1,0,13.4442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,16384,1,0,18.7801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,0,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,32768,1,0,44.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,32,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,16,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,256,1,0,0.3583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,128,1,0,0.2122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,8192,1,0,4.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,512,1,0,0.6480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1024,1,0,1.2931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1536,1,0,2.0065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,2048,1,0,2.7821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,6144,1,0,12.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,8192,1,0,16.9749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,10240,1,0,21.9372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,12288,1,0,27.0893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,16384,1,0,37.8349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,16,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,32,1,0,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,32768,1,0,88.7501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,128,1,0,0.3484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,256,1,0,0.6271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,512,1,0,1.2112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1024,1,0,2.4165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,2048,1,0,5.8528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1536,1,0,4.0470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,4096,1,0,15.4850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,64,1,0,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,6144,1,0,24.7084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,8192,1,0,34.4376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,4096,1,0,7.9294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,10240,1,0,43.8322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,32,1,0,0.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,16,1,0,0.1416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,12288,1,0,54.3513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,64,1,0,0.3480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,128,1,0,0.6056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,0,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,256,1,0,1.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,512,1,0,2.2668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1024,1,0,5.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1536,1,0,8.5344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,64,1,0,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,3072,1,0,22.5124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,2048,1,0,12.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,4096,1,0,31.4266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,6144,1,0,49.4595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,0,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,64,1,0,0.6079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,32,1,0,0.3495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,16,1,0,0.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,8192,1,0,69.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,128,1,0,1.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,256,1,0,2.1736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1024,1,0,10.7185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,512,1,0,4.8051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1536,1,0,17.3606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,2048,1,0,24.7416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,16,1,0,0.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,32,1,0,0.6057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,64,1,0,1.1405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,256,1,0,4.6132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,128,1,0,2.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,512,1,0,10.0468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,3072,1,0,45.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1024,1,0,21.8151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,0,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,16,1,0,0.6035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1536,1,0,35.3044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,32,1,0,1.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,2048,1,0,49.7147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,64,1,0,2.1069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,256,1,0,9.6850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,128,1,0,4.4807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,512,1,0,20.5841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,16,1,0,1.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,32,1,0,2.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,64,1,0,4.4714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,16384,1,0,76.4091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1024,1,0,44.2602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,128,1,0,9.4168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,256,1,0,19.8533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,16,1,0,0.1086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,64,1,0,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,512,1,0,41.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,0,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,128,1,0,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,32,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,256,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,512,1,0,0.2215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1536,1,0,0.5699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,4096,1,0,2.1876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1024,1,0,0.3899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,2048,1,0,0.7720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,3072,1,0,1.6200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,10240,1,0,5.3270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,12288,1,0,6.6506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,16384,1,0,9.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,16,1,0,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,32768,1,0,21.6494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,32,1,0,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,64,1,0,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,512,1,0,0.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,4096,1,0,63.2584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1024,1,0,0.6759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,128,1,0,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,256,1,0,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1536,1,0,1.0379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,2048,1,0,1.4655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,3072,1,0,2.9408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,4096,1,0,3.9738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,6144,1,0,6.0326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,8192,1,0,8.4227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,10240,1,0,10.7774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,16384,1,0,18.5641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,12288,1,0,13.4129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,16,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,32,1,0,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,64,1,0,0.1374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,128,1,0,0.2092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,32768,1,0,43.5070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,256,1,0,0.3520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,512,1,0,0.6386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1024,1,0,1.2774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1536,1,0,1.9754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,3072,1,0,5.7212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,4096,1,0,7.7943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,6144,1,0,12.0706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,10240,1,0,21.7150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,8192,1,0,16.8134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,2048,1,0,2.7504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,12288,1,0,26.8052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,64,1,0,0.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,16384,1,0,37.5328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,32,1,0,0.1398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,16,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,256,1,0,0.6125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1536,1,0,3.9526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,512,1,0,1.1999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1024,1,0,2.3908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,32768,1,0,87.5485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,3072,1,0,11.0073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,4096,1,0,15.2934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,6144,1,0,24.3366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,8192,1,0,33.9792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,16,1,0,0.1410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,10240,1,0,43.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,12288,1,0,53.6141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,32,1,0,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,64,1,0,0.3432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,128,1,0,0.5957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,256,1,0,1.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,16384,1,0,74.6660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,512,1,0,2.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1536,1,0,8.4241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1024,1,0,4.9833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,2048,1,0,12.0020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,3072,1,0,22.2452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,4096,1,0,30.9846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,0,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,16,1,0,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,32,1,0,0.3426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,6144,1,0,48.6282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,128,1,0,1.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,64,1,0,0.5975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,256,1,0,2.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,8192,1,0,68.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,512,1,0,4.6057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1024,1,0,10.5543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1536,1,0,17.1241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,2048,1,0,24.3735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,0,0.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,16,1,0,0.3429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,32,1,0,0.5982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,64,1,0,1.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,3072,1,0,44.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,128,1,0,0.3452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,128,1,0,2.0686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,4096,1,0,62.2295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,256,1,0,4.4571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,512,1,0,9.8984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1024,1,0,21.3122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,2048,1,0,5.6892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,16,1,0,0.5973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,32,1,0,1.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1536,1,0,34.2907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,128,1,0,4.3602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,256,1,0,9.5154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,2048,1,0,48.8161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,512,1,0,19.9910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,16,1,0,1.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,0,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1024,1,0,43.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,64,1,0,4.3110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,256,1,0,19.2795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,128,1,0,9.3213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,0,0.1460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,512,1,0,40.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.2204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.1791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.3120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,0.5770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,0.8806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,1.9960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,2.7941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,4.3750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,5.9426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,1.2372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,9.3651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,7.6728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,0,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,12.9624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.1628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.2958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,29.8762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,512,1,0,0.5239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,1.0334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,1.6274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,5.3363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,8.4917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,3.8408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,11.7245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,2.3376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,18.6609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,15.2929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,26.0918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,0,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.2905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,1.9356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,59.8739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,512,1,0,0.9354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,256,1,0,0.4978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,4.4324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,7.4900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,16.9243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,10.5171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,30.5536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,23.6888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,0,0.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,37.4962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,52.4833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.2190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.2894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,256,1,0,0.8813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,128,1,0,0.4879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,512,1,0,1.7442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,3.6572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,8.6901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,14.8694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,119.5620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,5.9967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,21.2618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,34.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,0,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,47.3620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.2918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,60.9293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,64,1,0,0.4868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,128,1,0,0.8627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,74.5855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,256,1,0,1.6443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,512,1,0,3.2500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,7.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,16384,1,0,104.4261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,11.9626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,17.5626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,30.0571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,0,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.2919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,42.5826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,32,1,0,0.4866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,64,1,0,0.8642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,3.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,128,1,0,1.6051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,256,1,0,3.0553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,68.0398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,512,1,0,6.3509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,8192,1,0,94.6446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,24.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,14.4210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,16,1,0,0.4920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,0,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,35.3865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,32,1,0,0.8646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,128,1,0,2.9827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,512,1,0,12.8847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,64,1,0,1.6052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,256,1,0,5.9509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,3072,1,0,60.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,4096,1,0,84.7726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,0,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,16,1,0,0.8754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,32,1,0,1.6005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,29.1311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,128,1,0,5.8340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1536,1,0,48.3410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,64,1,0,2.9849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,256,1,0,12.1184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,2048,1,0,70.5233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,0,0.2690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,16,1,0,1.6349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,512,1,0,25.8867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,32,1,0,3.0023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,128,1,0,11.8031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,64,1,0,5.8044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,256,1,0,24.4038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,57.8832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,0.7509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,512,1,0,51.9771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,1.3580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,3.8315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,2.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,5.4779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.3382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,8.9818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,19.5539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,15.9853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,0,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,12.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,26.7749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.2955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,58.6705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,512,1,0,0.5768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,1.4299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,2.6447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,7.6512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,4.2580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,11.0033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,24.7185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,32.0400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,18.0091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,39.5462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.1913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.1572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,54.7699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,512,1,0,1.0760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,2.8213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,256,1,0,0.4912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,8.3417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,21.8870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,15.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,36.3375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,5.2199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,119.2452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,0,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,50.7716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.1538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.1916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,79.3936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.2704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,64.9533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,256,1,0,0.8824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,128,1,0,0.4417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,512,1,0,2.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,5.4591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,16.7277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,110.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,10.4803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,30.7299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,44.9536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,0,0.1246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.1938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.2709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,73.5342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,64,1,0,0.4409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,128,1,0,0.7898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,256,1,0,1.6877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,102.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,512,1,0,4.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,32768,1,0,238.2303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,10.8972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,130.0087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,21.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,34.0983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,0,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,12288,1,0,161.3750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,16,1,0,0.2718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,62.0797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,32,1,0,0.4421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,64,1,0,0.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,128,1,0,1.5016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,256,1,0,3.1699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,512,1,0,8.2618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,89.9749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,220.7469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,22.1842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,0,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,42.7265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,16,1,0,0.4445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,32,1,0,0.7960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,147.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,64,1,0,1.5022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,68.2885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,128,1,0,2.8228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,256,1,0,6.2624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,8192,1,0,204.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,512,1,0,16.8746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,0,0.1691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,16,1,0,0.7991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,124.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,44.4837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,32,1,0,1.4978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,64,1,0,2.7996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,128,1,0,5.4763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,256,1,0,12.6725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1536,1,0,85.6864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,0,0.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,16,1,0,1.5135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,180.0400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,512,1,0,33.5226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,32,1,0,2.8065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,64,1,0,5.5113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,128,1,0,11.2261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,2048,1,0,136.5315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,256,1,0,25.4489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,0,0.1006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,89.0322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.2570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.5012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,1.2479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,3.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,0.8191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,2.1945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,512,1,0,67.0856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,5.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,6.9616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,11.1458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,15.2897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.1700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,9.0075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.1302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,35.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.2338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.4122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,0.8942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,2.4436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,10.0612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,4.2833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,1.5873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,6.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,13.9120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,22.3425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.1600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.1169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,31.2409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,512,1,0,0.7174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.3650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,18.0474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,70.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,8.5524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,4.6740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,3.0754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,20.3155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,28.6483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,1.7282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,36.7865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,0,0.1081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,45.3494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,12.2836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.2184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,63.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,256,1,0,0.6231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,512,1,0,1.3815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.3405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,9.5062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,6.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,3.2954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,17.3219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,25.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,32768,1,0,142.0462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,0,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,41.3684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,57.5606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,64,1,0,0.3395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,128,1,0,0.5789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,256,1,0,1.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,73.6643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,512,1,0,2.5985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,6.5298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,12.2933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,91.2342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,19.5253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,125.8988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.2185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,35.0617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,0,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.3435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,64,1,0,0.5737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,50.7213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,128,1,0,1.0854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,256,1,0,2.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,512,1,0,5.1857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,13.4855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,82.5957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,0,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,24.9245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.3433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,38.9688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,8192,1,0,115.9757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,32,1,0,0.5768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,64,1,0,1.0803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,128,1,0,1.9813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,256,1,0,4.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,70.2625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,512,1,0,10.7817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,27.0093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,16,1,0,0.5819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,0,0.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,64,1,0,1.9872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,101.9372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,128,1,0,3.8489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,49.8755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,32,1,0,1.0875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,256,1,0,8.7860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,0,0.1753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,78.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,512,1,0,21.5920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,32,1,0,1.9915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,16,1,0,1.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,256,1,0,17.5391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,128,1,0,8.0379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,64,1,0,3.8867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1024,1,0,54.0732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,0,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,512,1,0,43.6318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.1197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.2131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,0.5739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,0.3749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,0.8199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,1.9742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,5.6051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,1.3961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,4.3310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,9.6614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,6.9645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,23.4776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,3.1243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.3271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,2.6781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,0.6400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,6.1325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,1.0469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,3.7518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,8.4887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,19.7667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,13.9235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,1.5668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,11.2863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,0,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.2992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,47.0374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,512,1,0,0.5482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.1891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,1.2029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,7.4341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,17.5272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,1.9969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,22.9535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,2.9347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,28.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,12.3792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,5.2072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.2873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,39.8557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,256,1,0,0.4975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.1912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,512,1,0,1.0063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,5.8390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,2.2233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,15.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,10.5956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,3.8933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,32768,1,0,95.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,35.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.1911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,25.2649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.2869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,45.7068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.1440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,128,1,0,0.4733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,56.2321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,256,1,0,0.9232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,512,1,0,1.8770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,7.9105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,12.0919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,16384,1,0,80.3360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,21.4378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,4.3776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,30.9445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,64,1,0,0.4729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,50.8742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,128,1,0,0.8675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.2872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,256,1,0,1.6657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,9.0312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,8192,1,0,71.3487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,24.4303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,512,1,0,3.6872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,0,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,43.3609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,16.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,32,1,0,0.4762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,64,1,0,0.8669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,256,1,0,3.2271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,128,1,0,1.5763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,4096,1,0,62.4897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,512,1,0,7.6436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,64,1,0,1.5691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,18.3299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1536,1,0,32.5050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,128,1,0,3.0379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,0,0.1219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,16,1,0,0.4745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,32,1,0,0.8728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,49.3467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,256,1,0,6.6872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,16,1,0,0.8714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,32,1,0,1.5699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,512,1,0,15.6412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,0,0.1489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1024,1,0,37.0317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,256,1,0,13.7130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,128,1,0,6.3205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,512,1,0,31.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,64,1,0,3.0593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,0,0.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.1047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.1082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.1161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.2018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,0.5374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.3487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,1.7992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,5.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,3.9414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,0.7570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,2.8721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,8.8457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,6.3069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,1.2815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,21.5865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,0.5933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,0.9734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,1.4260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.3089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,3.4277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,12.6905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,18.0661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,5.5650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,7.8106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,2.4030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,10.2674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,43.2851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,0,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,1.1110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,1.8253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.2889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,2.6715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.1848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,6.8075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,512,1,0,0.5117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,15.7745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,4.7448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,25.4274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,11.2122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,36.7163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,20.5128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,256,1,0,0.4711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,512,1,0,0.9472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.2763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,9.4944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,3.5475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,2.0637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,13.8406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,5.3292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,22.8831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,32768,1,0,87.8712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,32.3050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,41.8573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.2753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,128,1,0,0.4488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,256,1,0,0.8661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,51.3289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,4.0321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,512,1,0,1.7287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,11.0224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,16384,1,0,74.0463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,7.2209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,19.4042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.1819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,28.3880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.2752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,128,1,0,0.8260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,64,1,0,0.4499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,45.9148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,256,1,0,1.5751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,8.4819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,8192,1,0,64.6751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,14.6864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,22.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,512,1,0,3.4307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.2746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,32,1,0,0.4494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,39.3815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,64,1,0,0.8262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,512,1,0,7.1808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,128,1,0,1.4828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,17.1776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,4096,1,0,56.7262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,256,1,0,3.0244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,0,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,29.4639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,32,1,0,0.8333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,16,1,0,0.4502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,64,1,0,1.4886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,2048,1,0,44.8937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,256,1,0,6.3095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,128,1,0,2.8776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,0,0.1416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,512,1,0,14.4236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,32,1,0,1.4764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,16,1,0,0.8366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1024,1,0,33.9420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,128,1,0,5.9802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,64,1,0,2.8567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,256,1,0,12.9269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,0,0.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.1399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,512,1,0,28.9045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.3487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,0.5344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,0.7519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,2.8262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,1.7689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,3.9367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,5.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,1.2697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,0,0.1045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,21.5411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,8.8243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.3051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,0.5851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,0.9665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,2.3907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,1.4103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,6.2490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,3.3605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,10.2005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,7.6640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,5.5130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,12.5117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.1844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,43.5459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,17.9091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,512,1,0,0.5089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,1.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,2.6496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,1.8002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,4.6473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,15.8157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,11.0346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,6.6917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,25.5552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.2856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,20.5983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.1834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,36.0409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,256,1,0,0.4683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,512,1,0,0.9366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,3.5110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,2.0081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,32768,1,0,87.3176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,9.3833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,5.2012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,13.6529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,32.1614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,41.2836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.1362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,22.7992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,128,1,0,0.4453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,51.2464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,256,1,0,0.8508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,512,1,0,1.7098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,7.2150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,11.0164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,3.9583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,16384,1,0,73.3461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,27.7860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.2695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,19.2914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,128,1,0,0.8099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,45.6775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,64,1,0,0.4410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,256,1,0,1.5378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,512,1,0,3.3411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,8192,1,0,63.8252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,14.7209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,8.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,22.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,64,1,0,0.8170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,128,1,0,1.4428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,3072,1,0,38.9420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,256,1,0,2.9425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,512,1,0,7.0707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,32,1,0,0.4415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,4096,1,0,56.2121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,16.8514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,16,1,0,0.4434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,64,1,0,1.4423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,32,1,0,0.8164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,29.5836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,128,1,0,2.7782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,2048,1,0,44.0296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,256,1,0,6.2441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,512,1,0,14.3095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,32,1,0,1.4586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,33.3474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,128,1,0,5.8370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,64,1,0,2.7724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,0,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,16,1,0,0.8098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,512,1,0,28.6149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,0,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,256,1,0,12.6420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.3482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,0.7496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,1.2582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,2.8004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,5.0397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,1.7829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,8.7651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,6.2805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,3.8830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.5231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,0.9529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,21.4649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,0.5845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,1.4059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,2.4019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,3.3390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,5.4784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,12.5042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,7.7196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,0,0.1026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,17.8591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.3033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,10.2047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.2829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,512,1,0,0.5007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.1798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,1.0985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,43.0532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,2.6441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,1.8073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,6.6717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,15.5796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,11.0121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,25.3004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,20.4935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,4.6637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,0,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.2763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,512,1,0,0.9280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,35.8932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,256,1,0,0.4637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,3.4529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,2.0051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,5.2088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,32768,1,0,87.0944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,9.4646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,13.5440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,0,0.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,41.0162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,22.2904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,50.8062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.2707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,32.0484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,128,1,0,0.4376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,256,1,0,0.8451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,512,1,0,1.6909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,16384,1,0,72.2668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,3.9419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,7.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,10.8831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,19.3786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.2691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,6144,1,0,45.2423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,64,1,0,0.4393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,27.7164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,128,1,0,0.8068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,8.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,512,1,0,3.3439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,256,1,0,1.5143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,8192,1,0,63.9904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,21.9416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,14.3895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,32,1,0,0.4380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,38.3787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,0,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,64,1,0,0.8016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,256,1,0,2.9458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,56.2665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,512,1,0,6.8775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,128,1,0,1.4358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,16.5850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,16,1,0,0.4412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,32,1,0,0.8068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1536,1,0,28.7663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,64,1,0,1.4257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,512,1,0,14.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,128,1,0,2.7681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,44.0121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,256,1,0,6.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,0,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,16,1,0,0.8011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,32,1,0,1.4261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,33.4333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,64,1,0,2.7459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,128,1,0,5.8126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,0,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,512,1,0,28.6189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,256,1,0,12.3284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.1964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.3425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,1.2635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.5284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,1.7841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,6.2413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,3.8400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,2.8148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,8.8004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,5.0140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,0.7434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,21.4565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.1013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,0.5848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,2.3672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,5.4568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,10.0714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,1.4084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,7.6873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,17.7334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,0,0.0975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,12.4519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,0.9584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,43.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.2805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,3.3556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,1.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,512,1,0,0.5012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,1.7976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,6.5568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,4.6306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,11.1413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,2.6416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,15.6005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,0,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,20.4199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,25.4058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,256,1,0,0.4594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.2683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.1833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,512,1,0,0.9272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,36.0046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,3.4975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,5.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,1.9996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,32768,1,0,86.2661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,13.6496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,22.6589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,31.4452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,9.3461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,41.2676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.2702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,12288,1,0,50.4523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,256,1,0,0.8403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,512,1,0,1.6762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,128,1,0,0.4376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,7.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,16384,1,0,72.4280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,10.7934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,27.4665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,18.9838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,3.9361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.2695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,64,1,0,0.4353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,6144,1,0,45.4858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,256,1,0,1.5164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,128,1,0,0.7976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,63.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,14.4357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,8.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,512,1,0,3.2860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,32,1,0,0.4368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,21.7539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,3072,1,0,38.6533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,128,1,0,1.4344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.2686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,64,1,0,0.7976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,256,1,0,2.8998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,4096,1,0,56.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,16.4567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,16,1,0,0.4366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,32,1,0,0.7973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,29.1308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,512,1,0,6.9282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,64,1,0,1.4312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,256,1,0,6.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,2048,1,0,44.2212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,0,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,512,1,0,14.0429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,0,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,128,1,0,2.7329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,32,1,0,1.4225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,32.9761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,128,1,0,5.7877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,64,1,0,2.7649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,256,1,0,12.2499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,16,1,0,0.8043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,512,1,0,28.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,64,1,0,0.1596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,0,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,32,1,0,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,256,1,0,0.2157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,16,1,0,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1536,1,0,0.8022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,2048,1,0,1.0883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1024,1,0,0.5497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,128,1,0,0.1764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,6144,1,0,3.5685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,512,1,0,0.3074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,3072,1,0,1.6948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,10240,1,0,6.2173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,8192,1,0,4.8236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,16384,1,0,10.5863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,12288,1,0,7.6108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,0,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,32,1,0,0.1597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,4096,1,0,2.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,32768,1,0,24.9090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,64,1,0,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,256,1,0,0.3016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,128,1,0,0.2154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,512,1,0,0.5239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1024,1,0,0.9898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,2048,1,0,2.0558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1536,1,0,1.4852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,16,1,0,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,6144,1,0,6.9231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,3072,1,0,3.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,4096,1,0,4.4201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,12288,1,0,15.1468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,0,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,8192,1,0,9.5006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,16384,1,0,21.2907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,16,1,0,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,10240,1,0,12.4287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,32,1,0,0.1765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,64,1,0,0.2143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,128,1,0,0.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,512,1,0,0.9368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1024,1,0,1.8580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,256,1,0,0.5091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,32768,1,0,49.9583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,3072,1,0,6.2688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,2048,1,0,3.8898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1536,1,0,2.8384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,6144,1,0,13.7765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,4096,1,0,8.6754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,8192,1,0,19.2263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,0,0.1489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,10240,1,0,24.8061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,16,1,0,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,64,1,0,0.2975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,32,1,0,0.2157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,16384,1,0,42.7900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,128,1,0,0.5064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,12288,1,0,30.5056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,512,1,0,1.7559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,256,1,0,0.9097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1536,1,0,5.4703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1024,1,0,3.4799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,3072,1,0,12.5271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,32768,1,0,100.1353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,2048,1,0,7.5916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,4096,1,0,17.4893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,16,1,0,0.2152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,8192,1,0,38.5431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,32,1,0,0.2999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,6144,1,0,27.8415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,0,0.1707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,64,1,0,0.5059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,12288,1,0,60.8814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,10240,1,0,49.5758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,128,1,0,0.9030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,256,1,0,1.7028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,512,1,0,3.2830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1024,1,0,6.8393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1536,1,0,10.8535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,16384,1,0,86.0557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,16,1,0,0.3007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,4096,1,0,35.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,2048,1,0,15.3922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,3072,1,0,25.3045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,0,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,32,1,0,0.5060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,6144,1,0,55.6448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,64,1,0,0.9040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,256,1,0,3.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,128,1,0,1.6887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,512,1,0,6.4363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,8192,1,0,77.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1024,1,0,13.8942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,16,1,0,0.5157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,2048,1,0,30.8827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,0,0.1721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1536,1,0,22.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,32,1,0,0.9027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,128,1,0,3.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,3072,1,0,50.3034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,64,1,0,1.6947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,256,1,0,6.2174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1024,1,0,27.9637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,512,1,0,13.0679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,0,0.2021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,4096,1,0,70.6092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,16,1,0,0.9183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,32,1,0,1.6887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,64,1,0,3.1628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,128,1,0,6.1961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1536,1,0,44.0133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,256,1,0,12.6520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,2048,1,0,62.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,16,1,0,1.7162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,0,0.2671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,512,1,0,26.3552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,32,1,0,3.1741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,64,1,0,6.1707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,0,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,16,1,0,0.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1024,1,0,55.9467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,256,1,0,25.6529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,64,1,0,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,128,1,0,0.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,128,1,0,12.5679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,512,1,0,0.2908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,512,1,0,52.5400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,256,1,0,0.1908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1024,1,0,0.5595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1536,1,0,0.9234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,2048,1,0,1.3689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,3072,1,0,2.3352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,4096,1,0,3.3408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,8192,1,0,7.3128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,6144,1,0,5.3001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,12288,1,0,11.5466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,10240,1,0,9.4250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,16384,1,0,15.9585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,16,1,0,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,32,1,0,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,128,1,0,0.1819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,64,1,0,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,32768,1,0,36.1983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1024,1,0,1.0288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,256,1,0,0.2685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,512,1,0,0.4926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1536,1,0,1.7561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,2048,1,0,2.6511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,3072,1,0,4.5624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,4096,1,0,6.4603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,6144,1,0,10.4842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,8192,1,0,14.4966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,12288,1,0,23.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,10240,1,0,18.8294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,16,1,0,0.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,32,1,0,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,16384,1,0,32.1508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,64,1,0,0.1816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,128,1,0,0.2501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,256,1,0,0.4421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,512,1,0,0.8958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1024,1,0,1.9775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1536,1,0,3.3848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,32768,1,0,72.6162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,3072,1,0,8.9673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,2048,1,0,5.1038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,6144,1,0,21.0171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,8192,1,0,29.2284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,4096,1,0,12.7980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,0,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,10240,1,0,37.7784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,16,1,0,0.1523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,12288,1,0,46.3988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,64,1,0,0.2472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,128,1,0,0.4021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,256,1,0,0.7945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,512,1,0,1.6931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,16384,1,0,64.4839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1024,1,0,3.7573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,2048,1,0,10.0637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1536,1,0,6.6220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,3072,1,0,18.0179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,4096,1,0,25.8935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,32768,1,0,144.9001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,0,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,6144,1,0,42.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,8192,1,0,58.6604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,16,1,0,0.1843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,32,1,0,0.2538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,64,1,0,0.4023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,10240,1,0,75.3685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,128,1,0,0.7130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,256,1,0,1.4786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,512,1,0,3.2096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,12288,1,0,92.3719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1536,1,0,13.2748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1024,1,0,7.3803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,2048,1,0,20.4125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,16384,1,0,129.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,3072,1,0,36.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,0,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,16,1,0,0.2507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,32,1,0,0.4044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,4096,1,0,51.9718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,64,1,0,0.7136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,128,1,0,1.3424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,256,1,0,2.7535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,512,1,0,6.3370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,6144,1,0,84.0448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1024,1,0,15.0319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,0,0.1404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1536,1,0,26.7511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,16,1,0,0.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,8192,1,0,117.1752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,2048,1,0,41.0018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,32,1,0,0.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,32,1,0,0.7169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,256,1,0,5.3680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,64,1,0,1.3391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,3072,1,0,72.1181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,512,1,0,12.7425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,128,1,0,2.4859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,0,0.1644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1024,1,0,30.3198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,4096,1,0,103.7708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,16,1,0,0.7210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,64,1,0,2.4897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,32,1,0,1.3329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,128,1,0,4.8267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1536,1,0,53.8729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,256,1,0,11.0707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,0,0.2090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,512,1,0,25.5008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,16,1,0,1.3465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,2048,1,0,81.7525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,64,1,0,4.8652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,128,1,0,9.9755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,32,1,0,2.4606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,256,1,0,22.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,16,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1024,1,0,60.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,64,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,128,1,0,0.1306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,256,1,0,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,512,1,0,0.2281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1536,1,0,0.6116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,512,1,0,51.5226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1024,1,0,0.3944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,32,1,0,0.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,3072,1,0,1.4640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,2048,1,0,0.8710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,10240,1,0,5.8349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,4096,1,0,2.0605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,12288,1,0,7.1619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,16384,1,0,10.0755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,8192,1,0,4.4885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,32,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,16,1,0,0.1139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,128,1,0,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,512,1,0,0.3583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,32768,1,0,24.1248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,6144,1,0,3.2602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,256,1,0,0.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1024,1,0,0.6947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1536,1,0,1.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,4096,1,0,3.9516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,6144,1,0,6.3595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,3072,1,0,2.8116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,64,1,0,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,2048,1,0,1.6662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,16384,1,0,20.3827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,16,1,0,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,12288,1,0,14.3964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,32,1,0,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,8192,1,0,8.8937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,10240,1,0,11.6534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,32768,1,0,48.2982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,128,1,0,0.2065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,256,1,0,0.3358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,64,1,0,0.1536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1024,1,0,1.3308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,512,1,0,0.6258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,3072,1,0,5.4565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,4096,1,0,7.7771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1536,1,0,2.1807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,6144,1,0,12.8066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,10240,1,0,23.5283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,2048,1,0,3.1550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,16384,1,0,40.8415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,64,1,0,0.2054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,128,1,0,0.3169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,8192,1,0,18.0749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,12288,1,0,28.9619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,32,1,0,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,16,1,0,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,512,1,0,1.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,256,1,0,0.5768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1024,1,0,2.4844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,3072,1,0,11.0416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,32768,1,0,96.6768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1536,1,0,4.2074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,4096,1,0,15.8183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,6144,1,0,25.8221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,16,1,0,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,8192,1,0,36.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,10240,1,0,46.8342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,32,1,0,0.2061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,64,1,0,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,128,1,0,0.5364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,256,1,0,1.0750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,12288,1,0,57.7687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1024,1,0,4.8605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,512,1,0,2.1678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,16384,1,0,81.9078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1536,1,0,8.4772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,2048,1,0,12.6597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,16,1,0,0.2083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,3072,1,0,22.1944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,32,1,0,0.3157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,4096,1,0,31.9703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,64,1,0,0.5412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,128,1,0,1.0023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,256,1,0,1.9710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,512,1,0,4.2382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,8192,1,0,72.3055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,6144,1,0,51.6312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1024,1,0,9.9983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1536,1,0,17.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,2048,1,0,25.5535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,32,1,0,0.5381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,16,1,0,0.3183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,64,1,0,1.0079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,3072,1,0,44.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,128,1,0,1.8276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,256,1,0,3.8324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,512,1,0,8.7178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,4096,1,0,63.4831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,0,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1024,1,0,20.1684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,16,1,0,0.5417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1536,1,0,34.4256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,32,1,0,0.9968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,128,1,0,3.5350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,64,1,0,1.8233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,2048,1,0,50.9768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,256,1,0,7.9539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,512,1,0,17.7299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,16,1,0,1.0088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,0,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,64,1,0,3.5270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1024,1,0,40.2264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,128,1,0,7.4026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,16,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,256,1,0,15.8862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,512,1,0,35.3290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,64,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,128,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,256,1,0,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,32,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,2048,1,0,0.6300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1536,1,0,0.4635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,4096,1,0,1.4258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,512,1,0,0.1979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1024,1,0,0.3176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,3072,1,0,1.0316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,12288,1,0,4.9820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,8192,1,0,3.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,6144,1,0,2.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,16384,1,0,7.0752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,16,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,32768,1,0,18.0486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,10240,1,0,4.0411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,32,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,64,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,512,1,0,0.2957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,256,1,0,0.1910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,128,1,0,0.1413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,2048,1,0,1.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,3072,1,0,1.9235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1024,1,0,0.5338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1536,1,0,0.8337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,4096,1,0,2.6843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,10240,1,0,8.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,12288,1,0,10.0172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,8192,1,0,6.0382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,6144,1,0,4.3246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,16384,1,0,14.4483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,16,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,32,1,0,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,128,1,0,0.1844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,64,1,0,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1536,1,0,1.5596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,256,1,0,0.2858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,32768,1,0,36.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1024,1,0,0.9988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,3072,1,0,3.7026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,32,1,0,1.8263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,2048,1,0,2.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,512,1,0,0.4976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,4096,1,0,5.2440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,6144,1,0,8.7142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,10240,1,0,16.3307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,12288,1,0,20.1856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,16,1,0,0.1223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,64,1,0,0.1837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,8192,1,0,12.3640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,128,1,0,0.2748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,32,1,0,0.1401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,256,1,0,0.4744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,512,1,0,0.9143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1536,1,0,2.9790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,32768,1,0,72.4597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,3072,1,0,7.4829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1024,1,0,1.8308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,4096,1,0,10.8351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,6144,1,0,17.6582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,16,1,0,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,10240,1,0,32.4622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,12288,1,0,40.1287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,32,1,0,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,64,1,0,0.2751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,128,1,0,0.4534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,16384,1,0,58.2061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,256,1,0,0.8659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,512,1,0,1.6857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1536,1,0,6.0596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1024,1,0,3.5512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,2048,1,0,8.8560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,3072,1,0,15.1914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,16,1,0,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,6144,1,0,35.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,32,1,0,0.2745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,64,1,0,0.4515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,8192,1,0,49.9923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,128,1,0,0.8325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,256,1,0,1.5634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,512,1,0,3.2721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1536,1,0,12.3521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1024,1,0,7.3604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,0,0.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,16,1,0,0.2770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,4096,1,0,43.6070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,32,1,0,0.4527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,64,1,0,0.8339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,256,1,0,3.0575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,512,1,0,6.7592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1536,1,0,24.7636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,2048,1,0,35.4364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,16,1,0,0.4577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,32,1,0,0.8311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,64,1,0,1.5023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,128,1,0,2.8809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,256,1,0,6.3476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,512,1,0,13.6790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,0,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,16,1,0,0.8334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,32,1,0,1.5009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,64,1,0,2.8829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,128,1,0,6.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,256,1,0,12.7286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,512,1,0,27.5332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,16,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,32,1,0,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,128,1,0,1.4895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,64,1,0,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,128,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,256,1,0,0.1390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,512,1,0,0.1907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1024,1,0,0.3035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,0,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,2048,1,0,0.5970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,3072,1,0,0.9812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,4096,1,0,1.3782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,10240,1,0,3.8925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,8192,1,0,2.9529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,6144,1,0,2.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,12288,1,0,4.8235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,16384,1,0,6.8539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,16,1,0,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,32768,1,0,17.5174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,32,1,0,0.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,64,1,0,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,128,1,0,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,256,1,0,0.1844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,512,1,0,0.2873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1024,1,0,0.5043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1536,1,0,0.7928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1024,1,0,30.0110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,2048,1,0,1.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,3072,1,0,1.8362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,4096,1,0,2.5524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,6144,1,0,4.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,8192,1,0,5.8467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,12288,1,0,9.6692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,16384,1,0,13.9641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,10240,1,0,7.7834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,16,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,0,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,32,1,0,0.1210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,32768,1,0,35.3376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,64,1,0,0.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,128,1,0,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,256,1,0,0.2705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1024,1,0,0.9351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1536,1,0,1.4741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,3072,1,0,3.5332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,4096,1,0,5.0469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,2048,1,0,2.0571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,6144,1,0,8.3548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,10240,1,0,15.6775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,8192,1,0,11.9680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,512,1,0,0.4615
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1536,1,0,0.4426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,12288,1,0,19.6425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,16,1,0,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,16384,1,0,28.0564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,32,1,0,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,64,1,0,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,256,1,0,0.4393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,128,1,0,0.2603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,512,1,0,0.8565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1536,1,0,2.7884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,32768,1,0,70.3826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,3072,1,0,7.1246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1024,1,0,1.6875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,4096,1,0,10.3676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,2048,1,0,4.0295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,6144,1,0,17.0591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,10240,1,0,31.1845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,16,1,0,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,8192,1,0,23.9508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,32,1,0,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,64,1,0,0.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,12288,1,0,38.7445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,128,1,0,0.4240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,16384,1,0,56.0314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1024,1,0,3.3259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,512,1,0,1.5394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,256,1,0,0.8094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,2048,1,0,8.2792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,3072,1,0,14.4508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,4096,1,0,20.7229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1536,1,0,5.6891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,16,1,0,0.1790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,64,1,0,0.4222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,256,1,0,1.4329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,8192,1,0,48.1418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,512,1,0,2.9772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,6144,1,0,34.0310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1024,1,0,6.8523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,128,1,0,0.7730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,32,1,0,0.2624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,0,0.1095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,2048,1,0,16.6882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,32,1,0,0.4224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,64,1,0,0.7758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,16,1,0,0.2606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,3072,1,0,28.9862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,256,1,0,2.7695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,128,1,0,1.3818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,512,1,0,6.1781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,16,1,0,0.4255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1536,1,0,22.9959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1024,1,0,13.8430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,32,1,0,0.7743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,128,1,0,2.6441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,64,1,0,1.3814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,4096,1,0,41.6797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,2048,1,0,33.6256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,256,1,0,5.8001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,0,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,512,1,0,12.4863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,32,1,0,1.3782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,128,1,0,5.5331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,64,1,0,2.6599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1024,1,0,27.6051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,16,1,0,0.7839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,256,1,0,11.7654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,16,1,0,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,128,1,0,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,512,1,0,24.8482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,64,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,32,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,256,1,0,0.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,0,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1024,1,0,0.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1536,1,0,0.4347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,2048,1,0,0.5868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,4096,1,0,1.3450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,8192,1,0,2.9281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,6144,1,0,2.1311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,10240,1,0,3.8334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,16384,1,0,6.7544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,12288,1,0,4.7403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,16,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,64,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,512,1,0,0.1890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,32768,1,0,17.4267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,128,1,0,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,32,1,0,0.1080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,256,1,0,0.1823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1024,1,0,0.4934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1536,1,0,0.7777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,3072,1,0,1.8056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,512,1,0,0.2778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,6144,1,0,4.1002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,10240,1,0,7.6561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,8192,1,0,5.7016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,12288,1,0,9.5554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,0,0.1020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,16,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,32,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,64,1,0,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,128,1,0,0.1772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,256,1,0,0.2685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,512,1,0,0.4502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1024,1,0,0.9132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1536,1,0,1.4313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,2048,1,0,2.0061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,3072,1,0,3.4622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,4096,1,0,4.9025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,6144,1,0,8.2288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,8192,1,0,11.7160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,10240,1,0,15.4154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,12288,1,0,19.2301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,16384,1,0,27.7135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,0,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,32,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,64,1,0,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,32768,1,0,69.7085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,128,1,0,0.2577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,256,1,0,0.4294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,512,1,0,0.8298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1024,1,0,1.6501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1536,1,0,2.7173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,2048,1,0,3.8964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,4096,1,0,10.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,6144,1,0,16.6787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,10240,1,0,30.7119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,32768,1,0,34.8603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,16,1,0,0.1389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,32,1,0,0.1766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,64,1,0,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,16384,1,0,55.3538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,128,1,0,0.4132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,256,1,0,0.7866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,512,1,0,1.4829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1024,1,0,3.1910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1536,1,0,5.5004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,2048,1,0,8.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,3072,1,0,14.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,4096,1,0,20.3745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,6144,1,0,33.2050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,16,1,0,0.1769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,32,1,0,0.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,8192,1,0,47.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,64,1,0,0.4121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,128,1,0,0.7528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,256,1,0,1.3945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,512,1,0,2.8444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1024,1,0,6.6446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1536,1,0,11.2202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,2048,1,0,16.3354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,16,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,3072,1,0,28.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,16,1,0,0.2567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,4096,1,0,40.6242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,64,1,0,0.7495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,3072,1,0,6.9882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,128,1,0,1.3305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,256,1,0,2.6735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,32,1,0,0.4146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,512,1,0,5.9719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1024,1,0,13.4413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,0,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,16,1,0,0.4148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,8192,1,0,23.7499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,32,1,0,0.7513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,2048,1,0,32.6052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1536,1,0,22.4109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,64,1,0,1.3260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,256,1,0,5.6848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,512,1,0,12.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,16,1,0,0.7582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,32,1,0,1.3278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1024,1,0,26.8177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,12288,1,0,38.4177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,64,1,0,2.5609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,128,1,0,5.3568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,16,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,256,1,0,11.5349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,512,1,0,24.1623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,128,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,32,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,256,1,0,0.1364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,0,0.0889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,64,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1536,1,0,0.4317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,2048,1,0,0.5845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1024,1,0,0.2995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,4096,1,0,1.3359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,8192,1,0,2.8931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,3072,1,0,0.9578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,10240,1,0,3.8346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,6144,1,0,2.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,512,1,0,0.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,16384,1,0,6.7151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,0,0.0987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,32768,1,0,17.2659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,12288,1,0,4.7153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,32,1,0,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,128,1,0,0.1336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,512,1,0,0.2764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1024,1,0,0.4889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,2048,1,0,1.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1536,1,0,0.7705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,256,1,0,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,3072,1,0,1.7906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,6144,1,0,4.0597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,64,1,0,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,10240,1,0,7.6386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,12288,1,0,9.4795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,8192,1,0,5.7081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,4096,1,0,2.4843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,16384,1,0,13.6864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,64,1,0,0.1333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,32768,1,0,34.6511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,16,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,128,1,0,0.1748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,32,1,0,0.1154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,256,1,0,0.2685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,512,1,0,0.4486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,2048,1,0,1.9948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,3072,1,0,3.4462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1024,1,0,0.9087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,10240,1,0,15.3248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,8192,1,0,11.6084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,12288,1,0,19.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,4096,1,0,4.8628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,16,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,16384,1,0,27.6636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1536,1,0,1.4163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,32,1,0,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,64,1,0,0.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,256,1,0,0.4290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,128,1,0,0.2566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1024,1,0,1.6247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,512,1,0,0.8192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,32768,1,0,69.6226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1536,1,0,2.7008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,4096,1,0,9.9904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,3072,1,0,6.9690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,2048,1,0,3.8637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,6144,1,0,16.5282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,16,1,0,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,10240,1,0,30.6672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,8192,1,0,23.4135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,0,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,32,1,0,0.1767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,64,1,0,0.2550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,128,1,0,0.4111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,12288,1,0,38.0483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,512,1,0,1.4690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,16384,1,0,54.8703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1024,1,0,3.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1536,1,0,5.4287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,256,1,0,0.7775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,2048,1,0,8.0568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,3072,1,0,14.1059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,32,1,0,0.2544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,4096,1,0,20.3168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,6144,1,0,32.9308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,16,1,0,0.1749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,64,1,0,0.4107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,128,1,0,0.7423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,256,1,0,1.3888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,8192,1,0,46.7132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1536,1,0,11.0835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1024,1,0,6.5312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,0,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,32,1,0,0.4075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,2048,1,0,16.2494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,16,1,0,0.2566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,3072,1,0,27.9201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,4096,1,0,40.6036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,64,1,0,0.7419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,256,1,0,2.6541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,128,1,0,1.3127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,512,1,0,5.8700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1536,1,0,22.3921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,16,1,0,0.4086
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1024,1,0,13.2555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,0,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,2048,1,0,32.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,32,1,0,0.7393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,64,1,0,1.3235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,512,1,0,12.0415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,0,0.1369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,128,1,0,2.5102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,16,1,0,0.7489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1024,1,0,26.4103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,256,1,0,5.5232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,32,1,0,1.3166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,0,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,128,1,0,5.2824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,16,1,0,0.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,256,1,0,11.3709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,32,1,0,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,64,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,512,1,0,23.7321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,128,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,256,1,0,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,512,1,0,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1024,1,0,0.2975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1536,1,0,0.4310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,3072,1,0,0.9612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,2048,1,0,0.5836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,4096,1,0,1.3307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,8192,1,0,2.9036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,10240,1,0,3.8172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,16384,1,0,6.6947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,12288,1,0,4.7158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,16,1,0,0.1018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,32768,1,0,17.2182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,128,1,0,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,64,1,0,0.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,256,1,0,0.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,512,1,0,0.2759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1024,1,0,0.4902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1536,1,0,0.7670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,2048,1,0,1.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,3072,1,0,1.7987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,4096,1,0,2.4886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,6144,1,0,4.0482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,10240,1,0,7.5886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,12288,1,0,9.4618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,16384,1,0,13.7043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,32,1,0,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,32768,1,0,34.6956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,16,1,0,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,64,1,0,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,128,1,0,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,256,1,0,0.2635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,512,1,0,0.4462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1024,1,0,0.8920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1536,1,0,1.4219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,2048,1,0,1.9868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,6144,1,0,8.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,4096,1,0,4.8480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,8192,1,0,11.6480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,10240,1,0,15.3244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,12288,1,0,19.0333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,16384,1,0,27.4333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,16,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,32,1,0,0.1333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,64,1,0,0.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,128,1,0,0.2540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,256,1,0,0.4269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,32768,1,0,69.4047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,512,1,0,0.8113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,32,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1536,1,0,2.6696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,2048,1,0,3.8666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,4096,1,0,9.9629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,6144,1,0,16.5671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,8192,1,0,23.2972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,10240,1,0,30.4769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,0,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,8192,1,0,5.6517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,12288,1,0,37.7261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,16,1,0,0.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,32,1,0,0.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,64,1,0,0.2539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,16384,1,0,55.0598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,128,1,0,0.4072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,512,1,0,1.4513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,256,1,0,0.7775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1024,1,0,3.1210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,3072,1,0,13.9447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,2048,1,0,7.9386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,4096,1,0,20.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,3072,1,0,3.4081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,0,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,6144,1,0,32.7892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,16,1,0,0.1728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,32,1,0,0.2535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,64,1,0,0.4071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,8192,1,0,46.4866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,128,1,0,0.7373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,256,1,0,1.3759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,512,1,0,2.7891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1024,1,0,6.5056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1536,1,0,10.9929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,2048,1,0,16.0404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,16,1,0,0.2538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,3072,1,0,27.9398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,32,1,0,0.4075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,64,1,0,0.7364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,4096,1,0,40.0903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,128,1,0,1.2988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1024,1,0,1.6186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,256,1,0,2.6423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,512,1,0,5.8382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1024,1,0,13.1659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1536,1,0,21.9758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,64,1,0,1.2996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,128,1,0,2.4915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,16,1,0,0.4088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,2048,1,0,31.9551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,256,1,0,5.4963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,512,1,0,11.8134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,0,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,32,1,0,1.2996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,64,1,0,2.5213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1024,1,0,26.2108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,16,1,0,0.7396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,128,1,0,5.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,256,1,0,11.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,512,1,0,23.5368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,32,1,0,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,16,1,0,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,64,1,0,0.2018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,128,1,0,0.2269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,256,1,0,0.2777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,512,1,0,0.4179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1536,1,0,1.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,2048,1,0,1.7289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,3072,1,0,3.2527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,0,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,4096,1,0,4.6015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,6144,1,0,6.9012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,8192,1,0,9.4838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,10240,1,0,12.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,12288,1,0,14.7685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,0,0.1822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,16384,1,0,21.5813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,16,1,0,0.1900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,32,1,0,0.2030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,64,1,0,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,128,1,0,0.2706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,32768,1,0,46.3170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,512,1,0,0.7123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,256,1,0,0.4015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1024,1,0,1.4982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1536,1,0,2.3712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,2048,1,0,3.4360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,3072,1,0,6.4464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,32,1,0,0.7428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,6144,1,0,13.9410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,4096,1,0,9.0461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,8192,1,0,20.4629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,10240,1,0,24.3927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,12288,1,0,29.7162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,16,1,0,0.2015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,0,0.1885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,32,1,0,0.2296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,16384,1,0,40.3813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,64,1,0,0.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,128,1,0,0.3925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,256,1,0,0.6892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,512,1,0,1.3756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1024,1,0,2.9394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,2048,1,0,7.2345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1536,1,0,4.9675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,32768,1,0,92.8762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,3072,1,0,12.6082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,4096,1,0,17.9504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,6144,1,0,27.7408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,8192,1,0,39.6639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,0,0.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1024,1,0,0.7687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,10240,1,0,49.3848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,16,1,0,0.2289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,12288,1,0,59.7416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,64,1,0,0.3968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,128,1,0,0.6779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,32,1,0,0.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,256,1,0,1.3348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,512,1,0,2.7691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,16384,1,0,84.3297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1024,1,0,6.0697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,2048,1,0,14.2584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1536,1,0,9.9769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,3072,1,0,25.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,4096,1,0,35.6926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,0,0.1887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,16,1,0,0.2733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,6144,1,0,59.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,32768,1,0,192.3636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,8192,1,0,80.2234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,32,1,0,0.3980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,64,1,0,0.6834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,128,1,0,1.2995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,10240,1,0,101.1206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,256,1,0,2.6184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,512,1,0,5.5784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1024,1,0,12.4017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,12288,1,0,125.3128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1536,1,0,20.3654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,2048,1,0,28.6869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,0,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,3072,1,0,51.8261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,16384,1,0,175.2047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,16,1,0,0.4010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,32,1,0,0.6776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,64,1,0,1.3214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,4096,1,0,71.9102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,128,1,0,2.5046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,256,1,0,5.6146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,512,1,0,11.1056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1024,1,0,24.4070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,6144,1,0,116.6163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,0,0.2146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1536,1,0,41.7430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,16,1,0,0.6930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,32,1,0,1.3153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,2048,1,0,59.6856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,8192,1,0,163.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,64,1,0,2.5164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,128,1,0,5.3453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,256,1,0,11.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,3072,1,0,106.4168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,512,1,0,22.8666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,0,0.2539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,16,1,0,1.3356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1024,1,0,50.6260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,32,1,0,2.6267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,4096,1,0,148.6575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,128,1,0,10.9728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,64,1,0,5.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,256,1,0,22.3186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1536,1,0,85.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,0,0.3239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,16,1,0,2.7288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,32,1,0,5.3061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,2048,1,0,120.6047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,512,1,0,48.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,64,1,0,10.3382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,0,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,128,1,0,20.9339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16,1,0,0.1333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,32,1,0,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,64,1,0,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,256,1,0,44.4667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,128,1,0,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1024,1,0,104.4291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,256,1,0,0.2248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1024,1,0,0.8395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,512,1,0,0.3794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1536,1,0,1.4744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,2048,1,0,2.3250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,6144,1,0,10.0924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,4096,1,0,6.4041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,512,1,0,99.2302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,3072,1,0,4.5341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,8192,1,0,13.8781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,12288,1,0,21.7629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,10240,1,0,17.7015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16,1,0,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,32,1,0,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,64,1,0,0.1667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16384,1,0,29.6749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,0,0.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,128,1,0,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,512,1,0,0.6519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,256,1,0,0.3333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1024,1,0,1.6148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,32768,1,0,64.3028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,2048,1,0,4.6406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1536,1,0,2.9338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,4096,1,0,12.7764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,3072,1,0,8.8864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,8192,1,0,28.0640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,0,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,10240,1,0,35.4404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16,1,0,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,6144,1,0,20.0630
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,32,1,0,0.1658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,12288,1,0,43.7704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16384,1,0,59.9448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,64,1,0,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,128,1,0,0.3084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,256,1,0,0.5623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,512,1,0,1.2747
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1536,1,0,5.8702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1024,1,0,3.1868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,2048,1,0,9.2960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,3072,1,0,17.5306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,32768,1,0,131.3790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,4096,1,0,25.3758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,6144,1,0,39.9691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,0,0.1346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,16,1,0,0.1675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,8192,1,0,56.2488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,32,1,0,0.2094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,64,1,0,0.3092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,10240,1,0,71.6831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,128,1,0,0.5116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,256,1,0,1.0749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,512,1,0,2.4901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,12288,1,0,90.0196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1024,1,0,6.2676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1536,1,0,11.7562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,2048,1,0,18.6395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16384,1,0,123.4526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,3072,1,0,34.7175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,4096,1,0,49.8795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,16,1,0,0.2105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,6144,1,0,82.3901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,32,1,0,0.3093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,64,1,0,0.5134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,8192,1,0,115.3953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,128,1,0,0.9792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,256,1,0,2.0775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,32768,1,0,265.4112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,512,1,0,4.9075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,10240,1,0,147.9238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1024,1,0,12.7292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1536,1,0,23.7435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,2048,1,0,37.4820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,12288,1,0,180.0121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,0,0.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,16,1,0,0.3101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,32,1,0,0.5128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,3072,1,0,70.5659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,64,1,0,0.9759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,128,1,0,1.8837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,256,1,0,4.0325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,16384,1,0,250.3100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,4096,1,0,100.5537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,512,1,0,9.8668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1024,1,0,25.3477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,0,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1536,1,0,48.7015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,16,1,0,0.5179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,6144,1,0,167.6769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,32,1,0,0.9756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,64,1,0,1.8897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,2048,1,0,76.7735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,128,1,0,3.6770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,256,1,0,8.0346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,512,1,0,20.0919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,8192,1,0,234.5386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,0,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,16,1,0,0.9904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,3072,1,0,143.1878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1024,1,0,53.4128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,32,1,0,1.8958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,64,1,0,3.6440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,128,1,0,7.5786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,0,0.2363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,256,1,0,16.6919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1536,1,0,99.8874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,16,1,0,1.9127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,4096,1,0,209.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,512,1,0,42.3124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,32,1,0,3.7378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,64,1,0,7.5296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,128,1,0,15.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,2048,1,0,158.1427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16,1,0,0.1110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,32,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,256,1,0,34.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,64,1,0,0.1188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,128,1,0,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1024,1,0,108.6703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,256,1,0,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1024,1,0,0.5213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,512,1,0,0.2616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1536,1,0,0.8610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,2048,1,0,1.3078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,512,1,0,87.3460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,4096,1,0,3.8094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,8192,1,0,7.8000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,6144,1,0,5.7821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,3072,1,0,2.7034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,12288,1,0,12.2841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,0,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16384,1,0,16.8502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,32,1,0,0.1190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,10240,1,0,10.0191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16,1,0,0.1142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,32768,1,0,37.2222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,64,1,0,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,128,1,0,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,256,1,0,0.2359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,512,1,0,0.4321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1024,1,0,0.9679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,2048,1,0,2.5788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1536,1,0,1.6683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,3072,1,0,5.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,4096,1,0,7.2945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,8192,1,0,15.7018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,12288,1,0,24.7018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,0,0.1100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,10240,1,0,19.9258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,6144,1,0,11.2975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16,1,0,0.1204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,32,1,0,0.1304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,64,1,0,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,512,1,0,0.7828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,128,1,0,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1536,1,0,3.3006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16384,1,0,33.7470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,256,1,0,0.3869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,32768,1,0,76.5685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1024,1,0,1.8698
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,3072,1,0,10.0902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,2048,1,0,5.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,4096,1,0,14.1677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,6144,1,0,22.4717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,0,0.1106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,8192,1,0,31.3091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,16,1,0,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,10240,1,0,40.3464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,32,1,0,0.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,64,1,0,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,128,1,0,0.3596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,256,1,0,0.6917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,12288,1,0,49.9014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,512,1,0,1.4960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16384,1,0,69.5937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1024,1,0,3.5857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1536,1,0,6.5440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,2048,1,0,10.1854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,3072,1,0,19.5338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,4096,1,0,28.0738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,0,0.1124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,6144,1,0,45.4646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,32768,1,0,157.0368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,16,1,0,0.1625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,32,1,0,0.2230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,8192,1,0,64.1574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,64,1,0,0.3623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,10240,1,0,81.9816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,128,1,0,0.6362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,256,1,0,1.3081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,512,1,0,2.9032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,12288,1,0,100.2017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1024,1,0,7.2788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1536,1,0,13.3754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,2048,1,0,20.6683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,0,0.1150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,3072,1,0,39.4525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,16384,1,0,142.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,16,1,0,0.2240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,32,1,0,0.3606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,4096,1,0,57.5794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,64,1,0,0.6417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,512,1,0,5.8027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,128,1,0,1.2213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,256,1,0,2.4599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1024,1,0,14.6055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,6144,1,0,92.5500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1536,1,0,27.0841
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,16,1,0,0.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,32,1,0,0.6420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,2048,1,0,42.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,8192,1,0,131.4764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,64,1,0,1.2154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,128,1,0,2.2739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,256,1,0,4.9156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,3072,1,0,79.8234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,512,1,0,12.0001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,0,0.1370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,16,1,0,0.6424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1024,1,0,29.9059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,32,1,0,1.2247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,4096,1,0,115.4898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,64,1,0,2.2912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,128,1,0,4.4758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,256,1,0,10.3302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1536,1,0,56.9744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,0,0.1737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,16,1,0,1.2292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,512,1,0,24.6873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,32,1,0,2.2834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,2048,1,0,86.4472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,64,1,0,4.5638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,128,1,0,9.4924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,16,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,32,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1024,1,0,59.9761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,256,1,0,20.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,64,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,128,1,0,0.1101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,256,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,512,1,0,0.2001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,512,1,0,50.4234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1024,1,0,0.3584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1536,1,0,0.5584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,2048,1,0,0.8390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,3072,1,0,1.7920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,8192,1,0,4.8233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,6144,1,0,3.6099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,10240,1,0,6.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,4096,1,0,2.4624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,12288,1,0,7.6364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,16,1,0,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,16384,1,0,10.5540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,0,0.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,32,1,0,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,256,1,0,0.1874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,64,1,0,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,32768,1,0,24.2761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,512,1,0,0.3151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,128,1,0,0.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1536,1,0,1.0457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,3072,1,0,3.2878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1024,1,0,0.6425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,2048,1,0,1.5800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,8192,1,0,9.7404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,6144,1,0,6.8993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,10240,1,0,12.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,16384,1,0,21.0783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,4096,1,0,4.5642
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,12288,1,0,15.0773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,32,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,16,1,0,0.1042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,64,1,0,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,128,1,0,0.1764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,32768,1,0,49.4246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1024,1,0,1.2032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1536,1,0,2.0299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,256,1,0,0.2883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,2048,1,0,3.0221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,512,1,0,0.5533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,6144,1,0,13.4955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,8192,1,0,19.0942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,3072,1,0,6.3428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,10240,1,0,24.7303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,4096,1,0,8.7389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,0,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,16,1,0,0.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,64,1,0,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,32,1,0,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,256,1,0,0.5003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,16384,1,0,42.8860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,12288,1,0,30.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,128,1,0,0.2723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,512,1,0,1.0115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1024,1,0,2.2650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1536,1,0,3.9403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,2048,1,0,5.9381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,32768,1,0,100.2767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,3072,1,0,11.8429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,6144,1,0,27.4937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,4096,1,0,17.0236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,8192,1,0,38.8523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,16,1,0,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,32,1,0,0.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,64,1,0,0.2750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,12288,1,0,61.6848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,256,1,0,0.9232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,10240,1,0,49.7783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,512,1,0,1.8904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,128,1,0,0.4744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1024,1,0,4.4711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,16384,1,0,87.0667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1536,1,0,7.9697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,3072,1,0,24.0693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,2048,1,0,12.3842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,4096,1,0,34.5686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,32,1,0,0.2730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,16,1,0,0.1788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,64,1,0,0.4774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,256,1,0,1.7005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,128,1,0,0.8792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,6144,1,0,55.4685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,512,1,0,3.7516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1024,1,0,9.2757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1536,1,0,16.4065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,0,0.1058
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,8192,1,0,79.7932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,16,1,0,0.2755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,32,1,0,0.4758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,64,1,0,0.8734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,2048,1,0,25.1908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,3072,1,0,48.5991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,128,1,0,1.5888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,256,1,0,3.3268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,512,1,0,7.9311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,4096,1,0,70.2839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1024,1,0,18.9954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,0,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,32,1,0,0.8772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,16,1,0,0.4768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1536,1,0,33.5065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,64,1,0,1.5976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,128,1,0,3.1724
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,256,1,0,7.0133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,2048,1,0,50.8812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,0,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,16,1,0,0.8758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,512,1,0,16.2115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,32,1,0,1.6004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,128,1,0,6.6935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1024,1,0,39.1547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,256,1,0,14.3550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,64,1,0,3.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,0,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,16,1,0,0.0840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,128,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,512,1,0,33.0215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,32,1,0,0.0871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,64,1,0,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,256,1,0,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1024,1,0,0.3196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1536,1,0,0.4949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,512,1,0,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,4096,1,0,2.2329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,2048,1,0,0.7363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,6144,1,0,3.1686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,12288,1,0,6.7951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,16384,1,0,9.4444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,8192,1,0,4.2561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,10240,1,0,5.4861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,3072,1,0,1.6091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,32,1,0,0.0956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,0,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,64,1,0,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,32768,1,0,21.9179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,16,1,0,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,128,1,0,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1024,1,0,0.5676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1536,1,0,0.9201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,2048,1,0,1.3767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,4096,1,0,4.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,6144,1,0,6.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,256,1,0,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,8192,1,0,8.4887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,512,1,0,0.2769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,10240,1,0,10.8170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,3072,1,0,2.9570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,0,0.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,16384,1,0,18.6677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,32,1,0,0.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,12288,1,0,13.2982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,16,1,0,0.0963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,128,1,0,0.1640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,32768,1,0,44.6000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,256,1,0,0.2561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,64,1,0,0.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,512,1,0,0.4854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,2048,1,0,2.5820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1536,1,0,1.7401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,4096,1,0,7.7649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,6144,1,0,11.9509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1024,1,0,1.0551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,8192,1,0,16.9505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,10240,1,0,21.6255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,3072,1,0,5.5647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,0,0.0837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,12288,1,0,26.7715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,32,1,0,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,16,1,0,0.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,128,1,0,0.2425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,256,1,0,0.4459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,64,1,0,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1536,1,0,3.4330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,16384,1,0,37.8006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1024,1,0,1.9709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,2048,1,0,5.1742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,512,1,0,0.9028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,32768,1,0,88.9655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,3072,1,0,10.4856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,4096,1,0,14.9381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,6144,1,0,24.4183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,0,0.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,16,1,0,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,10240,1,0,43.9684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,64,1,0,0.2414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,128,1,0,0.4240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,8192,1,0,34.2052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,12288,1,0,54.3675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,32,1,0,0.1629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,256,1,0,0.8130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,512,1,0,1.6621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1536,1,0,6.9753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,2048,1,0,10.6405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,16384,1,0,76.4008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1024,1,0,3.9164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,3072,1,0,21.3824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,0,0.0865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,4096,1,0,30.6145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,16,1,0,0.1627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,32,1,0,0.2415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,64,1,0,0.4241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,512,1,0,3.2386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1024,1,0,8.0227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,128,1,0,0.7755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,8192,1,0,70.3093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,256,1,0,1.4624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,6144,1,0,49.0234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1536,1,0,14.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,0,0.0963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,32,1,0,0.4240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,2048,1,0,22.0288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,16,1,0,0.2426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,64,1,0,0.7702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,3072,1,0,42.5927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,128,1,0,1.3882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,256,1,0,2.8805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1024,1,0,16.5656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,16,1,0,0.4259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,4096,1,0,60.9897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,32,1,0,0.7752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1536,1,0,29.4503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,2048,1,0,43.8544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,512,1,0,6.8029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,64,1,0,1.3801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,128,1,0,2.7009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,256,1,0,5.9725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,64,1,0,2.6812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,512,1,0,14.0745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,128,1,0,5.6408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,32,1,0,1.3815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1024,1,0,33.4089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,16,1,0,0.7737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,0,0.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,256,1,0,12.4105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,16,1,0,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,32,1,0,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,256,1,0,0.1205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,128,1,0,0.0937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,0,0.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,512,1,0,0.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,512,1,0,28.0333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,64,1,0,0.0865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1536,1,0,0.4763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,2048,1,0,0.7131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1024,1,0,0.3018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,6144,1,0,3.0895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,10240,1,0,5.3746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,4096,1,0,2.1366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,12288,1,0,6.6686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,3072,1,0,1.5980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,8192,1,0,4.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,16,1,0,0.0793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,32,1,0,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,32768,1,0,21.3916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,64,1,0,0.0939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,0,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,16384,1,0,9.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,512,1,0,0.2626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1536,1,0,0.8813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,128,1,0,0.1147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,256,1,0,0.1649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,2048,1,0,1.3198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1024,1,0,0.5403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,4096,1,0,4.0199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,3072,1,0,2.8579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,12288,1,0,13.0753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,0,0.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,16384,1,0,18.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,16,1,0,0.0870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,10240,1,0,10.4840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,6144,1,0,5.9773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,32768,1,0,43.2194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,32,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,8192,1,0,8.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,128,1,0,0.1551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,256,1,0,0.2401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,64,1,0,0.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1024,1,0,1.0068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,2048,1,0,2.5120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1536,1,0,1.6941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,512,1,0,0.4622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,6144,1,0,11.5184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,4096,1,0,7.5876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,8192,1,0,16.4620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,3072,1,0,5.3662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,0,0.0803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,12288,1,0,26.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,10240,1,0,21.2270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,64,1,0,0.1548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,32,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,16,1,0,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,16384,1,0,37.3601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,128,1,0,0.2284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,512,1,0,0.8430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1024,1,0,1.8703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,256,1,0,0.4189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,32768,1,0,87.8964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,3072,1,0,10.1882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1536,1,0,3.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,4096,1,0,14.4539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,2048,1,0,4.9561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,8192,1,0,33.4592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,0,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,16,1,0,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,32,1,0,0.1561
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,64,1,0,0.2266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,12288,1,0,53.7026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,10240,1,0,42.6513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,128,1,0,0.3987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,6144,1,0,23.2586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,512,1,0,1.5495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,256,1,0,0.7662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,16384,1,0,74.9391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1024,1,0,3.6585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,2048,1,0,10.3369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1536,1,0,6.7636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,0,0.0803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,3072,1,0,20.7535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,16,1,0,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,32,1,0,0.2305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,4096,1,0,29.6343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,128,1,0,0.7197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,256,1,0,1.3859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,512,1,0,3.0335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,6144,1,0,47.9355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,64,1,0,0.3986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1024,1,0,7.6710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1536,1,0,13.5836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,0,0.0864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,32,1,0,0.3996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,2048,1,0,21.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,16,1,0,0.2288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,8192,1,0,67.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,3072,1,0,42.0540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,64,1,0,0.7220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,128,1,0,1.2861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,256,1,0,2.6823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,512,1,0,6.4099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,0,0.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1024,1,0,15.7464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,4096,1,0,59.3638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,16,1,0,0.3981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,64,1,0,1.2844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,128,1,0,2.4972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1536,1,0,27.6898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,32,1,0,0.7286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,2048,1,0,42.5654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,512,1,0,13.2998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,0,0.1169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,256,1,0,5.5344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,16,1,0,0.7233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1024,1,0,31.3346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,128,1,0,5.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,64,1,0,2.5139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,32,1,0,1.2916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,16,1,0,0.0778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,256,1,0,11.5965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,256,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,512,1,0,26.4211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,64,1,0,0.0861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,0,0.0662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,128,1,0,0.0909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,32,1,0,0.0788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,512,1,0,0.1668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1024,1,0,0.2954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1536,1,0,0.4723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,6144,1,0,3.0712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,4096,1,0,2.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,3072,1,0,1.5499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,8192,1,0,4.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,12288,1,0,6.5767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,2048,1,0,0.6900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,10240,1,0,5.2759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,16,1,0,0.0791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,32,1,0,0.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,0,0.0750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,256,1,0,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,16384,1,0,9.1424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,32768,1,0,21.3007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,64,1,0,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,512,1,0,0.2578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,128,1,0,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1536,1,0,0.8621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,2048,1,0,1.2870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,3072,1,0,2.8322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,4096,1,0,3.9878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,6144,1,0,5.9786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,10240,1,0,10.2482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,8192,1,0,8.1883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1024,1,0,0.5311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,0,0.0782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,16384,1,0,18.0152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,32,1,0,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,12288,1,0,12.8803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,64,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,128,1,0,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,256,1,0,0.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,16,1,0,0.0868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,512,1,0,0.4469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,32768,1,0,43.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1024,1,0,0.9851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1536,1,0,1.6625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,4096,1,0,7.4846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,6144,1,0,11.5148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,3072,1,0,5.3809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,2048,1,0,2.4274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,0,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,10240,1,0,21.1834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,16,1,0,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,8192,1,0,16.1320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,16384,1,0,36.8709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,32,1,0,0.1105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,64,1,0,0.1520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,12288,1,0,25.7925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,256,1,0,0.4084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,512,1,0,0.8199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1024,1,0,1.8168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,128,1,0,0.2215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,2048,1,0,4.9052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,4096,1,0,14.3412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,3072,1,0,10.0317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1536,1,0,3.1676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,32768,1,0,87.2233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,6144,1,0,23.1144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,0,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,32,1,0,0.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,8192,1,0,33.1251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,64,1,0,0.2221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,12288,1,0,52.9755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,16,1,0,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,10240,1,0,42.3406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,128,1,0,0.3858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,256,1,0,0.7437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,512,1,0,1.4983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1024,1,0,3.5712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,16384,1,0,74.0559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1536,1,0,6.5448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,2048,1,0,10.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,0,0.0806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,16,1,0,0.1514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,3072,1,0,20.4600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,4096,1,0,29.2879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,32,1,0,0.2220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,64,1,0,0.3861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,6144,1,0,46.6880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,128,1,0,0.6988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,512,1,0,2.9716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1536,1,0,13.5335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,2048,1,0,20.9614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1024,1,0,7.4611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,0,0.0865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,8192,1,0,66.7191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,16,1,0,0.2230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,3072,1,0,40.4621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,256,1,0,1.3271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,32,1,0,0.3835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,64,1,0,0.7021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,128,1,0,1.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,256,1,0,2.5685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,4096,1,0,58.5540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1024,1,0,15.3138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,16,1,0,0.3868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,512,1,0,6.3062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,0,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1536,1,0,27.3953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,64,1,0,1.2435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,32,1,0,0.6988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,2048,1,0,41.4325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,256,1,0,5.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,128,1,0,2.4083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,16,1,0,0.7001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,32,1,0,1.2419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,512,1,0,13.0428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,0,0.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,256,1,0,11.1581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,128,1,0,5.0779
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1024,1,0,30.6742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,16,1,0,0.0754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,64,1,0,2.3943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,32,1,0,0.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,512,1,0,26.0318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,64,1,0,0.0852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,256,1,0,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,128,1,0,0.0899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1024,1,0,0.2964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,512,1,0,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,0,0.0643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,4096,1,0,2.0939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,6144,1,0,3.0617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,3072,1,0,1.5344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,8192,1,0,4.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,10240,1,0,5.3234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1536,1,0,0.4683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,12288,1,0,6.5309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,2048,1,0,0.6960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,16,1,0,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,0,0.0746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,32,1,0,0.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,64,1,0,0.0900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,512,1,0,0.2538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,16384,1,0,9.0314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,128,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,32768,1,0,21.3156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1024,1,0,0.5253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1536,1,0,0.8572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,256,1,0,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,4096,1,0,3.9384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,6144,1,0,5.8236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,3072,1,0,2.8540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,8192,1,0,8.2853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,2048,1,0,1.2951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,16384,1,0,18.0657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,0,0.0764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,10240,1,0,10.2472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,32,1,0,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,12288,1,0,12.7829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,16,1,0,0.0826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,256,1,0,0.2329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,128,1,0,0.1494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,512,1,0,0.4441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1536,1,0,1.6458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1024,1,0,0.9809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,32768,1,0,42.6880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,64,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,4096,1,0,7.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,3072,1,0,5.3212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,2048,1,0,2.4351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,8192,1,0,16.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,0,0.0751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,16,1,0,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,12288,1,0,25.4577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,16384,1,0,36.7035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,32,1,0,0.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,6144,1,0,11.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,10240,1,0,20.9758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,64,1,0,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,256,1,0,0.4060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,512,1,0,0.8088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,128,1,0,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1536,1,0,3.1557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,32768,1,0,86.4508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,3072,1,0,9.9407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,2048,1,0,4.8430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1024,1,0,1.7899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,4096,1,0,14.2004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,6144,1,0,23.2696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,16,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,8192,1,0,32.9752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,32,1,0,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,0,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,64,1,0,0.2195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,10240,1,0,41.5859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,128,1,0,0.3812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,512,1,0,1.4793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,16384,1,0,73.2816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,12288,1,0,52.6875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1024,1,0,3.5531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,256,1,0,0.7314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,2048,1,0,9.9796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1536,1,0,6.5157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,3072,1,0,20.2860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,16,1,0,0.1500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,0,0.0767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,4096,1,0,28.9812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,32,1,0,0.2192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,64,1,0,0.3819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,256,1,0,1.3069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,6144,1,0,46.3952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,512,1,0,2.9470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1024,1,0,7.4171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,128,1,0,0.6867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1536,1,0,13.1957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,0,0.0830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,8192,1,0,66.7516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,32,1,0,0.3820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,16,1,0,0.2180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,2048,1,0,20.5787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,64,1,0,0.6879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,3072,1,0,40.6957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,128,1,0,1.2374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,4096,1,0,58.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,256,1,0,2.5546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,512,1,0,6.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,0,0.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1024,1,0,15.2371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,32,1,0,0.6881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,64,1,0,1.2330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,16,1,0,0.3846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,2048,1,0,40.9433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1536,1,0,27.1648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,256,1,0,5.2679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,128,1,0,2.3566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,512,1,0,12.7488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,32,1,0,1.2237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,128,1,0,4.9305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,64,1,0,2.3572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1024,1,0,30.2599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,256,1,0,10.8977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,0,0.1701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,16,1,0,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,32,1,0,0.1864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,512,1,0,25.2636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,16,1,0,0.6940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,128,1,0,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,64,1,0,0.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,512,1,0,0.4117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1024,1,0,0.7370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1536,1,0,1.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,2048,1,0,1.5074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,4096,1,0,3.8163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,256,1,0,0.2719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,3072,1,0,2.7192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,10240,1,0,9.7705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,6144,1,0,5.5995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,16384,1,0,16.8257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,12288,1,0,11.9918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,8192,1,0,7.6709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,16,1,0,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,64,1,0,0.2267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,0,0.1815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,32,1,0,0.1998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,256,1,0,0.4020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,32768,1,0,37.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,512,1,0,0.7112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,128,1,0,0.2688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1024,1,0,1.4205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1536,1,0,2.0856
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,3072,1,0,5.4781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,6144,1,0,11.3227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,4096,1,0,7.4242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,2048,1,0,3.0614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,8192,1,0,15.5873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,0,0.1818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,10240,1,0,19.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,32,1,0,0.2251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,16384,1,0,32.3510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,16,1,0,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,64,1,0,0.2706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,128,1,0,0.4046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,256,1,0,0.7012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,12288,1,0,24.2126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1536,1,0,4.3756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,512,1,0,1.3720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,32768,1,0,77.3294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1024,1,0,2.7538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,2048,1,0,6.0996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,4096,1,0,14.5172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,3072,1,0,10.4568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,6144,1,0,22.7306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,10240,1,0,40.2701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,0,0.1813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,16,1,0,0.2287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,32,1,0,0.2690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,12288,1,0,49.2867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,8192,1,0,30.6820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,64,1,0,0.4061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,128,1,0,0.6993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,16384,1,0,67.8104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,256,1,0,1.3460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,512,1,0,2.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1536,1,0,8.7876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,2048,1,0,12.2912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,3072,1,0,20.2959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1024,1,0,5.7605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,6144,1,0,45.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,4096,1,0,28.7921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,32768,1,0,158.8594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,0,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,8192,1,0,64.4319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,16,1,0,0.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,32,1,0,0.4045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,128,1,0,1.3528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,64,1,0,0.7001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,10240,1,0,82.4533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,256,1,0,2.6025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,512,1,0,5.4651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,12288,1,0,106.3573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1536,1,0,18.5485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1024,1,0,11.6818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,2048,1,0,25.6705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,0,0.1935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,3072,1,0,41.9794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,16384,1,0,145.5829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,16,1,0,0.4090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,32,1,0,0.7016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,4096,1,0,59.2579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,64,1,0,1.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,512,1,0,10.9540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,256,1,0,5.4242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,128,1,0,2.6191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,6144,1,0,94.1761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1024,1,0,23.5967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,0,0.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,16,1,0,0.7068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,8192,1,0,136.7502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1536,1,0,37.6818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,2048,1,0,50.7726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,32,1,0,1.3569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,64,1,0,2.5949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,128,1,0,5.3819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,256,1,0,10.9612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,3072,1,0,87.8866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,0,0.2516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,512,1,0,23.0905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,16,1,0,1.3771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1024,1,0,47.9765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,32,1,0,2.6780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,4096,1,0,128.4124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,64,1,0,5.3513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1536,1,0,75.6643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,128,1,0,11.0183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,256,1,0,22.0304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,0,0.3205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,16,1,0,2.7019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,2048,1,0,102.4562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,32,1,0,5.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,512,1,0,46.0695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,64,1,0,10.7571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,128,1,0,22.4407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,0,0.1455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,16,1,0,0.1339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,32,1,0,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,64,1,0,0.1710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1024,1,0,99.6100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,128,1,0,0.1612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,256,1,0,45.6442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,256,1,0,0.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,512,1,0,0.3228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1536,1,0,1.0305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1024,1,0,0.6283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,2048,1,0,1.5434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,3072,1,0,2.9737
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,512,1,0,95.9929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,6144,1,0,6.2787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,4096,1,0,4.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,10240,1,0,10.9583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,8192,1,0,8.5688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,0,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,12288,1,0,13.3678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,16,1,0,0.1388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,32,1,0,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,64,1,0,0.1600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,16384,1,0,18.2901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,256,1,0,0.3046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,512,1,0,0.5562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,128,1,0,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1536,1,0,1.9955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,32768,1,0,40.5171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,3072,1,0,5.7170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1024,1,0,1.2034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,4096,1,0,8.0197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,8192,1,0,17.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,2048,1,0,3.0112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,6144,1,0,12.3554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,12288,1,0,26.5383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,0,0.1340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,10240,1,0,21.4744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,32,1,0,0.1601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,64,1,0,0.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,16,1,0,0.1495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,16384,1,0,36.5876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,128,1,0,0.2849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,256,1,0,0.5144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,512,1,0,1.0634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1024,1,0,2.3559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,32768,1,0,82.8866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,2048,1,0,5.8839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,3072,1,0,11.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,4096,1,0,15.6616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1536,1,0,3.9619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,6144,1,0,24.3443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,8192,1,0,33.8251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,0,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,16,1,0,0.1620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,32,1,0,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,64,1,0,0.2832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,12288,1,0,53.6926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,10240,1,0,43.7620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,128,1,0,0.4752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,16384,1,0,75.5487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,256,1,0,0.9709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,512,1,0,2.0538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1024,1,0,4.5382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1536,1,0,7.7413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,3072,1,0,21.5593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,2048,1,0,11.7851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,4096,1,0,30.5221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,6144,1,0,49.9717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,32768,1,0,166.8371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,16,1,0,0.1993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,8192,1,0,69.7704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,32,1,0,0.2836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,64,1,0,0.4769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,256,1,0,1.8505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,128,1,0,0.8972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,10240,1,0,88.6147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,512,1,0,3.9849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1024,1,0,9.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,12288,1,0,110.5515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1536,1,0,15.9293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,2048,1,0,23.7158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,0,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,16,1,0,0.2874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,3072,1,0,43.5219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,32,1,0,0.4752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,16384,1,0,154.0978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,64,1,0,0.8964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,4096,1,0,62.7371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,128,1,0,1.7255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,256,1,0,3.6082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,512,1,0,8.0435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1024,1,0,18.2731
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,6144,1,0,100.5394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1536,1,0,32.0375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,0,0.1556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,16,1,0,0.4797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,32,1,0,0.8979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,2048,1,0,49.5107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,8192,1,0,141.6210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,64,1,0,1.7107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,128,1,0,3.3830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,256,1,0,7.2893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,512,1,0,16.0720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,3072,1,0,90.2396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,0,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,16,1,0,0.9096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1024,1,0,38.1194
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,32,1,0,1.7082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,64,1,0,3.4220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,4096,1,0,129.2402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,128,1,0,6.7357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1536,1,0,67.1796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,256,1,0,14.9071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,0,0.2243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,16,1,0,1.7366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,32,1,0,3.3719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,512,1,0,33.6266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,2048,1,0,99.7549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,64,1,0,6.9283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,128,1,0,13.7055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,0,0.0986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,16,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,64,1,0,0.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,256,1,0,30.3357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1024,1,0,79.5317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,32,1,0,0.1129
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,128,1,0,0.1307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,256,1,0,0.1617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,512,1,0,0.2313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1024,1,0,0.4157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1536,1,0,0.6398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,4096,1,0,2.7189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,512,1,0,70.8636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,3072,1,0,1.9306
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,2048,1,0,0.9307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,8192,1,0,5.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,10240,1,0,6.7192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,6144,1,0,3.8897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,16384,1,0,11.3831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,12288,1,0,8.3087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,32,1,0,0.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,32768,1,0,25.7700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,128,1,0,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,16,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,64,1,0,0.1291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,256,1,0,0.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1024,1,0,0.7605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1536,1,0,1.2186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,512,1,0,0.3795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,4096,1,0,4.9922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,6144,1,0,7.5504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,3072,1,0,3.6313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,8192,1,0,10.4041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,12288,1,0,16.2641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,2048,1,0,1.7868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,10240,1,0,13.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,32,1,0,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,16,1,0,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,128,1,0,0.2104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,512,1,0,0.6933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,256,1,0,0.3553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,16384,1,0,22.6149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1536,1,0,2.3712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,32768,1,0,51.9493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1024,1,0,1.4592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,64,1,0,0.1541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,3072,1,0,6.9181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,2048,1,0,3.4393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,6144,1,0,14.7568
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,10240,1,0,26.3722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,8192,1,0,20.4533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,12288,1,0,32.6661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,16,1,0,0.1282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,4096,1,0,9.5034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,16384,1,0,45.7493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,32,1,0,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,64,1,0,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,128,1,0,0.3391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,512,1,0,1.3236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1024,1,0,2.7624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,256,1,0,0.6414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,3072,1,0,13.0814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1536,1,0,4.5963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,4096,1,0,18.3397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,2048,1,0,6.7866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,32768,1,0,108.5648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,6144,1,0,29.7888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,0,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,32,1,0,0.2117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,16,1,0,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,8192,1,0,41.4346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,64,1,0,0.3396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,10240,1,0,53.7499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,128,1,0,0.6006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,256,1,0,1.2195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,512,1,0,2.5164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,12288,1,0,67.9998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1536,1,0,9.4778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,2048,1,0,13.8969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1024,1,0,5.4545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,16384,1,0,94.6702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,3072,1,0,26.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,0,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,16,1,0,0.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,32,1,0,0.3391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,4096,1,0,37.3242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,64,1,0,0.5996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,128,1,0,1.1337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,256,1,0,2.2991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,512,1,0,4.8963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,6144,1,0,60.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1024,1,0,11.2804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,8192,1,0,85.8366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1536,1,0,19.0740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,0,0.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,16,1,0,0.3405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,2048,1,0,28.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,32,1,0,0.6025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,64,1,0,1.1410
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,128,1,0,2.1335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,3072,1,0,53.4742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,256,1,0,4.5152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,512,1,0,9.9905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,0,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,4096,1,0,78.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,32,1,0,1.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1024,1,0,22.5246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,16,1,0,0.6051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1536,1,0,39.9207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,64,1,0,2.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,128,1,0,4.2882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,256,1,0,9.4562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,2048,1,0,60.0650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,16,1,0,1.1451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,32,1,0,2.1492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,0,0.1688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,512,1,0,20.2106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,64,1,0,4.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1024,1,0,47.0492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,128,1,0,8.8743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,0,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,16,1,0,0.0974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,256,1,0,19.4343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,64,1,0,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,32,1,0,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,256,1,0,0.1323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,512,1,0,41.5555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,512,1,0,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,128,1,0,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1536,1,0,0.4448
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,2048,1,0,0.6305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,3072,1,0,1.3903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,8192,1,0,3.5391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,4096,1,0,1.8883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1024,1,0,0.3049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,6144,1,0,2.6494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,10240,1,0,4.5209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,16384,1,0,7.8608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,12288,1,0,5.6777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,0,0.0977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,32,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,16,1,0,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,64,1,0,0.1111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,32768,1,0,18.6589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,128,1,0,0.1305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,256,1,0,0.1774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1536,1,0,0.8135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1024,1,0,0.5408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,3072,1,0,2.5291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,512,1,0,0.2817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,2048,1,0,1.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,6144,1,0,5.0365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,4096,1,0,3.4658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,12288,1,0,11.0152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,0,0.0965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,16384,1,0,15.4612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,16,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,32,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,8192,1,0,7.0396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,64,1,0,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,32768,1,0,37.7301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,128,1,0,0.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,10240,1,0,8.7827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,512,1,0,0.4976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,256,1,0,0.2725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1024,1,0,1.0017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,3072,1,0,4.7719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1536,1,0,1.5866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,2048,1,0,2.2177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,6144,1,0,9.7484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,4096,1,0,6.4908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,12288,1,0,22.1553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,10240,1,0,17.8603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,0,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,8192,1,0,13.7226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,16,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,32,1,0,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,16384,1,0,31.4373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,64,1,0,0.1730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,128,1,0,0.2635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1024,1,0,1.8451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,512,1,0,0.9184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1536,1,0,3.0160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,2048,1,0,4.3433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,256,1,0,0.4732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,3072,1,0,8.7154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,4096,1,0,12.2304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,32768,1,0,77.2231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,8192,1,0,27.9241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,0,0.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,10240,1,0,35.6873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,16,1,0,0.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,6144,1,0,19.5809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,32,1,0,0.1720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,64,1,0,0.2608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,12288,1,0,44.5650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,128,1,0,0.4526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,256,1,0,0.8714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,16384,1,0,63.8243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1536,1,0,6.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1024,1,0,3.6029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,2048,1,0,8.8844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,512,1,0,1.6907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,3072,1,0,17.6124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,4096,1,0,25.0458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,16,1,0,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,32,1,0,0.2612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,64,1,0,0.4530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,256,1,0,1.5939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,128,1,0,0.8342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,6144,1,0,39.9377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,512,1,0,3.3011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,8192,1,0,56.4536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1536,1,0,12.5547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1024,1,0,7.4558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,2048,1,0,18.2068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,0,0.1070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,32,1,0,0.4553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,128,1,0,1.5142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,3072,1,0,35.5030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,64,1,0,0.8404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,16,1,0,0.2640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,256,1,0,3.0782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,4096,1,0,50.6781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,512,1,0,6.9049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1024,1,0,15.5624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,16,1,0,0.4573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,32,1,0,0.8318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1536,1,0,25.5663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,128,1,0,2.9446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,2048,1,0,37.0353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,64,1,0,1.5242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,256,1,0,6.5224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,16,1,0,0.8362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,0,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,512,1,0,14.3512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,64,1,0,2.9774
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,32,1,0,1.5164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1024,1,0,31.0318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,128,1,0,6.2602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,16,1,0,0.0843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,32,1,0,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,128,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,512,1,0,28.4837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,64,1,0,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,256,1,0,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,512,1,0,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1536,1,0,0.4026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1024,1,0,0.2708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,3072,1,0,1.3298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,0,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,4096,1,0,1.8076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,2048,1,0,0.5703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,6144,1,0,2.4836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,8192,1,0,3.2949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,12288,1,0,5.2736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,10240,1,0,4.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,0,0.0825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,32768,1,0,17.6760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,32,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,16384,1,0,7.3231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,16,1,0,0.0877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,128,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,256,1,0,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,64,1,0,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1024,1,0,0.4783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1536,1,0,0.7301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,3072,1,0,2.3961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,2048,1,0,1.0603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,512,1,0,0.2508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,6144,1,0,4.7093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,8192,1,0,6.5667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,4096,1,0,3.2373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,12288,1,0,10.2636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,0,0.0837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,16384,1,0,14.5356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,32,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,16,1,0,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,32768,1,0,35.9394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,64,1,0,0.1203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,10240,1,0,8.2042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,128,1,0,0.1577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,512,1,0,0.4341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1024,1,0,0.8819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,256,1,0,0.2373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1536,1,0,1.4032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,3072,1,0,4.3451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,2048,1,0,1.9621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,4096,1,0,5.9388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,8192,1,0,12.7551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,6144,1,0,9.0069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,12288,1,0,20.7160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,0,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,16,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,16384,1,0,29.5248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,10240,1,0,16.6926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,32,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,64,1,0,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,128,1,0,0.2298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1536,1,0,2.6602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,512,1,0,0.8026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,32768,1,0,71.3187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1024,1,0,1.6027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,2048,1,0,3.8343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,4096,1,0,11.2761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,3072,1,0,8.0236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,6144,1,0,18.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,0,0.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,8192,1,0,25.9867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,16,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,12288,1,0,41.4421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,32,1,0,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,10240,1,0,33.0319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,64,1,0,0.2296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,128,1,0,0.3983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,16384,1,0,58.5943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,512,1,0,1.4366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,256,1,0,0.7565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1536,1,0,5.4001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,2048,1,0,7.9108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1024,1,0,3.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,4096,1,0,22.9982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,0,0.0869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,16,1,0,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,6144,1,0,36.6029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,32,1,0,0.2286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,64,1,0,0.3949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,3072,1,0,16.2369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,128,1,0,0.7163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,256,1,0,1.3443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,8192,1,0,51.9440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,512,1,0,2.7899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1024,1,0,6.5076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,2048,1,0,16.4365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,16,1,0,0.2289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1536,1,0,10.9154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,3072,1,0,32.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,32,1,0,0.3959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,64,1,0,0.7250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,4096,1,0,45.6485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,128,1,0,1.2741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,256,1,0,2.6151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,512,1,0,5.8187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1024,1,0,13.3784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,0,0.1028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,16,1,0,0.3952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1536,1,0,22.4254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,256,1,0,0.4152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,64,1,0,1.2793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,32,1,0,0.7186
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,128,1,0,2.4649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,2048,1,0,32.5650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,512,1,0,12.0092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,256,1,0,5.4222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,32,1,0,1.2788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,64,1,0,2.4635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1024,1,0,26.6546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,0,0.1228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,16,1,0,0.7190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,256,1,0,11.3124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,128,1,0,5.2393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,16,1,0,0.0825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,32,1,0,0.0832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,512,1,0,23.8855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,128,1,0,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,64,1,0,0.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,0,0.0699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,512,1,0,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,256,1,0,0.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1536,1,0,0.3837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1024,1,0,0.2579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,6144,1,0,2.4158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,8192,1,0,3.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,3072,1,0,1.2538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,12288,1,0,5.1107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,2048,1,0,0.5427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,16384,1,0,7.1376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,0,0.0798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,4096,1,0,1.7552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,32768,1,0,17.2437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,32,1,0,0.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,10240,1,0,4.1121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,128,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,16,1,0,0.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,512,1,0,0.2350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1024,1,0,0.4470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,256,1,0,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,3072,1,0,2.2817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,64,1,0,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1536,1,0,0.6930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,4096,1,0,3.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,8192,1,0,6.3494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,2048,1,0,1.0071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,10240,1,0,7.9399
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,6144,1,0,4.5511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,16,1,0,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,16384,1,0,14.0714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,12288,1,0,9.9436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,0,0.0811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,32,1,0,0.0949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,128,1,0,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,256,1,0,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,32768,1,0,34.8350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1024,1,0,0.8271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,64,1,0,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,2048,1,0,1.8569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,512,1,0,0.4063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1536,1,0,1.3112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,6144,1,0,8.7260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,10240,1,0,16.1371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,12288,1,0,19.9218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,4096,1,0,5.7512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,8192,1,0,12.3137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,0,0.0824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,16384,1,0,28.6143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,3072,1,0,4.2494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,16,1,0,0.0953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,32,1,0,0.1128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,128,1,0,0.2141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,64,1,0,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,512,1,0,0.7429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1536,1,0,2.5055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,256,1,0,0.3853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,32768,1,0,69.3007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,3072,1,0,7.6422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,4096,1,0,10.8534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,2048,1,0,3.6257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,6144,1,0,17.5153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1024,1,0,1.5052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,0,0.0817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,10240,1,0,31.9094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,16,1,0,0.1168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,8192,1,0,25.2123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,32,1,0,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,12288,1,0,39.9069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,64,1,0,0.2135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,512,1,0,1.3450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,256,1,0,0.7042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,128,1,0,0.3680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,2048,1,0,7.4746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,16384,1,0,56.8072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1024,1,0,2.9177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,3072,1,0,15.4427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1536,1,0,5.0723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,0,0.0845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,4096,1,0,22.1995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,16,1,0,0.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,64,1,0,0.3668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,6144,1,0,35.3171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,32,1,0,0.2127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,256,1,0,1.2370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1024,1,0,6.0310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,512,1,0,2.6101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1536,1,0,10.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,8192,1,0,50.1683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,128,1,0,0.6647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,16,1,0,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,32,1,0,0.3666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,0,0.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,128,1,0,1.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,4096,1,0,43.9036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,64,1,0,0.6636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,3072,1,0,30.8506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,256,1,0,2.4188
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,512,1,0,5.3595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1024,1,0,12.4326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,32,1,0,0.6720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1536,1,0,20.9071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,0,0.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,64,1,0,1.1719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,128,1,0,2.2540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,2048,1,0,30.7377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,16,1,0,0.3669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,256,1,0,5.0705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,0,0.1145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,512,1,0,11.0803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,16,1,0,0.6659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,32,1,0,1.1837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,256,1,0,10.3414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,128,1,0,4.8418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1024,1,0,24.7507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,0,0.0681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,512,1,0,21.9600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,16,1,0,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,32,1,0,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,128,1,0,0.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,64,1,0,0.0872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,256,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,512,1,0,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1536,1,0,0.3745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,2048,1,0,0.5291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,3072,1,0,1.2516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,6144,1,0,2.3593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,4096,1,0,1.7284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,10240,1,0,4.0404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,12288,1,0,5.0805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,8192,1,0,3.1220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,16384,1,0,7.0362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,0,0.0770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,32768,1,0,16.9882
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,16,1,0,0.0811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,32,1,0,0.0866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,64,1,0,0.0927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,128,1,0,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,256,1,0,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,512,1,0,0.2295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1024,1,0,0.4346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1536,1,0,0.6712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,2048,1,0,0.9795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,3072,1,0,2.2439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,6144,1,0,4.4815
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,8192,1,0,6.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,10240,1,0,7.7992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,0,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,16384,1,0,13.8557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,16,1,0,0.0883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,32,1,0,0.0920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,32768,1,0,34.4866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,64,1,0,0.1130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,128,1,0,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,256,1,0,0.2176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,512,1,0,0.3954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1536,1,0,1.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1024,1,0,0.8028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,2048,1,0,1.8037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,3072,1,0,4.1297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,4096,1,0,5.6755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,6144,1,0,8.6132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,8192,1,0,12.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,12288,1,0,19.6304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,0,0.0768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,16384,1,0,28.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,16,1,0,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,32,1,0,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,64,1,0,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,32768,1,0,68.8202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,512,1,0,0.7148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,256,1,0,0.3730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1024,1,0,1.4379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1536,1,0,2.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,2048,1,0,3.5262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,4096,1,0,10.6437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,6144,1,0,17.3351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,8192,1,0,24.6482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,4096,1,0,3.1246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,0,0.0801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,12288,1,0,39.5216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,32,1,0,0.1445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,16384,1,0,55.9032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,64,1,0,0.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,128,1,0,0.3559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,512,1,0,1.2955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,256,1,0,0.6721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1024,1,0,2.8213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1536,1,0,4.9628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,2048,1,0,7.2839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,10240,1,0,15.8476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,3072,1,0,15.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,4096,1,0,21.8133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,0,0.0807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,6144,1,0,34.4318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,128,1,0,0.2060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,16,1,0,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,32,1,0,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,64,1,0,0.3538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,8192,1,0,49.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,128,1,0,0.6443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,256,1,0,1.2076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,512,1,0,2.4909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1024,1,0,5.8311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1536,1,0,9.9846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,2048,1,0,15.0921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,0,0.0875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,3072,1,0,30.3699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,16,1,0,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,64,1,0,0.6432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,4096,1,0,43.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,128,1,0,1.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,256,1,0,2.3212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,512,1,0,5.1836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,16,1,0,0.1103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1024,1,0,12.1703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,0,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,16,1,0,0.3573
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,32,1,0,0.6383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,128,1,0,2.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,64,1,0,1.1327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,2048,1,0,29.7784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,512,1,0,10.8298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,256,1,0,4.8085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,16,1,0,0.6459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,32,1,0,1.1295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1024,1,0,23.7651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,128,1,0,4.5476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,64,1,0,2.1889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,256,1,0,10.0848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,16,1,0,0.0766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,32,1,0,0.0785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,64,1,0,0.0847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,0,0.0670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,128,1,0,0.0907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,256,1,0,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,512,1,0,21.1616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,512,1,0,0.1538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1024,1,0,0.2479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1536,1,0,0.3703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,2048,1,0,0.5268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,3072,1,0,1.2517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,4096,1,0,1.7057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,6144,1,0,2.3748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,10240,1,0,4.0521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,12288,1,0,5.0257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,0,0.0763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,32768,1,0,16.8907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,16,1,0,0.0787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,32,1,0,0.0855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,64,1,0,0.0922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,32,1,0,0.3540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,256,1,0,0.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,512,1,0,0.2265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,128,1,0,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1024,1,0,0.4360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1536,1,0,0.6648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,2048,1,0,0.9665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,4096,1,0,3.0427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,3072,1,0,2.2533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1536,1,0,20.2873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,6144,1,0,4.4271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,10240,1,0,7.7685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,12288,1,0,9.7404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,0,0.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,16384,1,0,13.7474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,16,1,0,0.0862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,32,1,0,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,32768,1,0,34.1550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,128,1,0,0.1435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,64,1,0,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,256,1,0,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,512,1,0,0.3929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1536,1,0,1.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,2048,1,0,1.7914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,3072,1,0,4.1179
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1024,1,0,0.7869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,4096,1,0,5.6255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,8192,1,0,12.0446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,10240,1,0,15.6964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,12288,1,0,19.4551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,16384,1,0,27.8614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,0,0.0768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,16,1,0,0.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,64,1,0,0.1438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,32768,1,0,68.1411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,256,1,0,0.3697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,512,1,0,0.7033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1024,1,0,1.4298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1536,1,0,2.3714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,4096,1,0,10.6071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,6144,1,0,17.0750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,8192,1,0,24.5273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,10240,1,0,31.0446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,0,0.0784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,12288,1,0,39.0443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,16,1,0,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,32,1,0,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,64,1,0,0.2056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,128,1,0,0.3537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,256,1,0,0.6618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1024,1,0,2.7728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1536,1,0,4.8366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,128,1,0,0.2062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,2048,1,0,7.2285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,3072,1,0,14.9522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,2048,1,0,3.5032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,4096,1,0,21.6623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,6144,1,0,34.2909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,16,1,0,0.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,32,1,0,0.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,8192,1,0,48.5781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,64,1,0,0.3520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,256,1,0,1.1819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,512,1,0,2.4518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1024,1,0,5.7497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1536,1,0,9.8803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,2048,1,0,14.9223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,0,0.0867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,3072,1,0,30.0915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,16,1,0,0.2061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,32,1,0,0.3519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,4096,1,0,42.6249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,64,1,0,0.6340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,16384,1,0,55.5894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,128,1,0,1.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,256,1,0,2.2633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,512,1,0,5.0730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1024,1,0,11.9876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,0,0.0917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,16,1,0,0.3523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,512,1,0,1.2536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,2048,1,0,29.5560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,32,1,0,0.6324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,64,1,0,1.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,128,1,0,2.1182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,256,1,0,4.6914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,0,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,16,1,0,0.6285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1024,1,0,23.6913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,32,1,0,1.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,64,1,0,2.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,128,1,0,4.4473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,0,0.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,512,1,0,20.6817
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.1897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.1801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.2116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,1.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,1.7020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,4.3250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,3.1378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.2735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,6.6195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,9.3518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,11.5349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.2346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,14.3819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.1822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,20.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,45.9178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.4096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.2700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.3951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,1.4653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,2.3105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,512,1,0,0.7177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,6.3576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,0.7794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,3.3158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,8.6295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,13.3605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,24.0444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,18.2486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.1922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,28.3911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.2114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.2696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.3858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,40.1888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,256,1,0,0.6938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,512,1,0,1.3515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,2.8149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,4.7908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,7.0175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,12.5449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,93.8855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,17.4390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,27.5174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,0,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,37.3822
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,48.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.2136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.3857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,59.7498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,32,1,0,0.2706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,128,1,0,0.6752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,82.7946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,512,1,0,2.5834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,256,1,0,1.3008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,9.6392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,5.8199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,24.3952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,14.0407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,35.3355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,0,0.1919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,55.8909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.2713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,32768,1,0,192.7254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.3885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,8192,1,0,78.9222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,64,1,0,0.6827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,128,1,0,1.2757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,99.4713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,512,1,0,5.3971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,256,1,0,2.4523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,11.8829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,12288,1,0,119.2977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,20.1732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,28.6420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,0,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.3881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,16384,1,0,170.7764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,50.0679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,32,1,0,0.6829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,64,1,0,1.2769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,128,1,0,2.4230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,74.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,256,1,0,5.0292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,512,1,0,10.8360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,118.9389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,24.1375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,0,0.1998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,40.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,16,1,0,0.6891
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,32,1,0,1.2704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,8192,1,0,159.3221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,60.3590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,64,1,0,2.4960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,128,1,0,4.8265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,256,1,0,10.3788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,512,1,0,22.4442
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,0,0.2380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,3072,1,0,102.1762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,16,1,0,1.2874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,51.3005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,32,1,0,2.3995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,4096,1,0,147.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,64,1,0,4.9751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,128,1,0,10.2550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1536,1,0,82.3708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,256,1,0,21.4732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,16,1,0,2.4266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,0,0.3211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,32,1,0,5.0322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,512,1,0,46.7583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,2048,1,0,126.3651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,64,1,0,10.4246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,128,1,0,20.2742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,256,1,0,42.4667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1024,1,0,102.7104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,512,1,0,100.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.1499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.1713
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.1789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.2409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.3994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,6.7040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.8964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,2.4578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,1.5948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,4.7298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,10.3079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,30.5455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,14.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,18.4547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.1743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,22.4784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,65.5933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.2244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.3509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,512,1,0,0.7084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,1.7375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,4.8626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,3.1207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,13.2548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,20.8327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,28.8805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,9.2867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.1816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,45.0458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.1459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.1709
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,36.7082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,61.4243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.3262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.6157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.2265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,512,1,0,1.3849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,3.3910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,6.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,9.7638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,18.5566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,132.6748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,26.3730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,41.9477
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.1718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,58.4479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.2258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,74.2732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.5637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.3265
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,512,1,0,2.6987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,91.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,256,1,0,1.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,6.8153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,19.7942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,12.6195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,124.8610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,36.4356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,52.7560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.1515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,84.6023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.3302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,64,1,0,0.5639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,128,1,0,1.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,117.6988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,256,1,0,2.2534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,32768,1,0,270.1951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,512,1,0,5.4603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,13.7209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,25.4418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,150.2434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,12288,1,0,184.3718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,40.0172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.3280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,73.6548
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,32,1,0,0.5685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,64,1,0,1.0863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,16384,1,0,251.8551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,128,1,0,2.0688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,106.4595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,512,1,0,11.1023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,256,1,0,4.4560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,28.0156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,0,0.1525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,16,1,0,0.5739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,6144,1,0,170.1089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,51.3785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,32,1,0,1.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,64,1,0,2.0718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,128,1,0,4.0812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,80.7440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,256,1,0,9.0319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,512,1,0,22.5014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,0,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,236.3948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,16,1,0,1.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,56.5911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,3072,1,0,148.9014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,32,1,0,2.0732
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,64,1,0,4.0885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,128,1,0,8.2272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,0,0.2520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,103.5212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,4096,1,0,214.1950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,16,1,0,2.0842
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,256,1,0,18.4941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,512,1,0,46.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,32,1,0,4.0911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,64,1,0,8.2365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,2048,1,0,162.8761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,128,1,0,16.8646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,256,1,0,36.9787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1024,1,0,113.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,512,1,0,92.4687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.1301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.1252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.1419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.1950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.2972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,4.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.5851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,0.9739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,1.4776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,2.9337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.1483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,6.2794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,8.5641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,11.0213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,40.4367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.1425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,13.5616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.2752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.4952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,1.9273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,18.4435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,5.7175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,2.9422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,1.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,12.5073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,8.0304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,26.7386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,21.9069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.1423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,17.3170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,37.1884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.1855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.2606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,2.2102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.4496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,5.8838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,3.8469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.9237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,11.2462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,81.8157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,15.9109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,24.8172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.1508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,34.2988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,44.4200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.1427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.1834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.2601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,54.5487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,512,1,0,1.8366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,75.0542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.4240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.8354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,4.3602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,11.9062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,7.7542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,21.9649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,31.2489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,50.3752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,32768,1,0,163.5277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,69.8560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.1869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.4227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.2612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.7773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,88.8028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,256,1,0,1.6397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,8.8648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,512,1,0,3.6651
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,15.7121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,109.7143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,24.0252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,150.8708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.1237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.2609
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,44.2081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.4231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.7766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,63.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,256,1,0,3.2151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,128,1,0,1.5517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,512,1,0,7.4153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,17.9632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,6144,1,0,101.4336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,31.8954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.4242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.7799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,8192,1,0,140.6687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,64,1,0,1.5465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,48.7728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,128,1,0,3.0207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,256,1,0,6.4994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,3072,1,0,88.7854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,512,1,0,15.3334
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.1471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,32,1,0,1.5464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,36.2867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.7798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,126.7315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,64,1,0,3.0322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,128,1,0,6.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1536,1,0,64.3212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,256,1,0,13.1527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,16,1,0,1.5655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,97.7665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,512,1,0,30.9295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,32,1,0,3.0281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,64,1,0,6.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,128,1,0,12.4013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,256,1,0,26.8336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,72.7144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,512,1,0,62.0163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.1242
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.1623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.2447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,2.7588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.4499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1536,1,0,0.6939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,0.9958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,3072,1,0,2.0153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,6144,1,0,4.2519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,5.7351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,10240,1,0,7.4381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.1048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,12288,1,0,9.1497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,32768,1,0,28.0933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.1594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.3963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,12.5381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.2351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.8169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,1.9115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1536,1,0,1.3010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,5.5061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,11.5268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,6144,1,0,8.3582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,3072,1,0,3.9451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,25.0704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,12288,1,0,18.0030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,10240,1,0,14.6043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.1172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.1570
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.1231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.2257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,32768,1,0,56.5382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.3769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1536,1,0,2.6808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,1.5406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.7110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,3.9407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,3072,1,0,7.6785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,22.9857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,6144,1,0,16.4101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,10240,1,0,29.4475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,10.6624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,12288,1,0,36.1331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,50.1652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.2239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.3579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,512,1,0,1.3610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1536,1,0,5.3658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,3.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.6645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,7.9407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,20.7494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,3072,1,0,14.7342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,6144,1,0,33.3616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,32768,1,0,114.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.1200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,10240,1,0,58.9289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,46.4660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.3576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,12288,1,0,72.5364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.6408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,256,1,0,1.2625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,512,1,0,2.8108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,6.4459
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,101.0538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,16.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1536,1,0,10.9000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,42.0966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.3578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,3072,1,0,29.8523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.6432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.2223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,128,1,0,1.2065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,6144,1,0,67.0511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,512,1,0,5.7397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,13.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,256,1,0,2.6019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1536,1,0,22.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,8192,1,0,93.2211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.3621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.6409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,32.8564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,64,1,0,1.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,128,1,0,2.5130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,512,1,0,11.6819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,256,1,0,5.2763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.1199
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,26.6283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,85.0636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,3072,1,0,59.7640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.6412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,32,1,0,1.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,64,1,0,2.5182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1536,1,0,44.8215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,256,1,0,10.7425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,65.5096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,16,1,0,1.2175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,512,1,0,23.7812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,128,1,0,5.0560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,32,1,0,2.5197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,128,1,0,10.3090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,64,1,0,5.0743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,53.3552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,512,1,0,47.5413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,256,1,0,21.7744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.1997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.2269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.4164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1536,1,0,0.6460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.9113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,2.5743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,3072,1,0,1.8911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,6144,1,0,3.6914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.1043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,11.5722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,10240,1,0,6.8289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,32768,1,0,25.9132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,12288,1,0,8.3988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.1412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.3745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.2145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1536,1,0,1.1930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,1.7281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,4.9786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.7540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,3072,1,0,3.4766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,12288,1,0,16.4556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,10240,1,0,13.2908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,10.6204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,6144,1,0,7.6423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,22.9165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.1426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.6691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.3484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,1.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,3.4500
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,32768,1,0,52.6583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1536,1,0,2.3222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,3072,1,0,7.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,9.8393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,6144,1,0,15.0227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,20.7760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,10240,1,0,26.9616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.1414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.2105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,46.3990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,12288,1,0,33.2328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.3407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.6320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,512,1,0,1.2397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,2.8277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1536,1,0,5.0009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,3072,1,0,13.5913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,7.2803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,19.1955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,32768,1,0,105.6447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,6144,1,0,30.3941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.2093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,42.4219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,10240,1,0,54.0251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.3347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.6053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,256,1,0,1.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,12288,1,0,66.8705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,5.9586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1536,1,0,10.0595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,92.7460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,14.8897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,512,1,0,2.5115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,3072,1,0,27.6254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,38.8300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.3346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.6069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,128,1,0,1.1271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,256,1,0,2.3255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,512,1,0,5.3593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,6144,1,0,61.4940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,12.1702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,8192,1,0,84.7574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,30.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.3402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.6085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1536,1,0,20.4741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,64,1,0,1.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,128,1,0,2.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,3072,1,0,55.5046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,256,1,0,4.9408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,512,1,0,11.0117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,77.5196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,32,1,0,1.1169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,24.7466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.6087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,64,1,0,2.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1536,1,0,41.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,128,1,0,4.7786
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,256,1,0,10.1250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,32,1,0,2.2429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,512,1,0,22.3147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,16,1,0,1.1222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,60.2920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,64,1,0,4.7802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,49.4933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,128,1,0,9.7690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,256,1,0,20.5750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,512,1,0,44.3780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,16,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,64,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,32,1,0,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,256,1,0,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,0,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,128,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,512,1,0,0.2174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1024,1,0,0.4036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1536,1,0,0.6300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,2048,1,0,0.8935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,4096,1,0,2.5400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,16,1,0,0.0982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,3072,1,0,1.8529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,8192,1,0,4.9903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,6144,1,0,3.6934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,32,1,0,0.0967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,10240,1,0,6.5513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,64,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,16384,1,0,11.2865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,12288,1,0,8.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,128,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,32768,1,0,25.7185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,256,1,0,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,512,1,0,0.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1024,1,0,0.7346
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,2048,1,0,1.6892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,3072,1,0,3.3946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1536,1,0,1.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,4096,1,0,4.8541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,6144,1,0,7.3877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,8192,1,0,10.3020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,12288,1,0,16.2232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,10240,1,0,13.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,0,0.1104
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,32,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,16384,1,0,22.7533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,64,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,16,1,0,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,128,1,0,0.2024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,512,1,0,0.6537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,32768,1,0,51.8158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1024,1,0,1.3607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,256,1,0,0.3409
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1536,1,0,2.2466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,3072,1,0,6.8853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,4096,1,0,9.4967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,2048,1,0,3.2923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,8192,1,0,20.4849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,6144,1,0,14.8661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,16,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,10240,1,0,26.5345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,0,0.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,16384,1,0,45.9008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,64,1,0,0.1992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,32,1,0,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,12288,1,0,32.8203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,128,1,0,0.3300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,256,1,0,0.6103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,512,1,0,1.2089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1536,1,0,4.6773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1024,1,0,2.6893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,3072,1,0,13.3413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,2048,1,0,6.9557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,6144,1,0,29.9725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,32768,1,0,103.5067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,4096,1,0,18.9200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,0,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,8192,1,0,41.8730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,16,1,0,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,32,1,0,0.2020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,128,1,0,0.5883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,10240,1,0,53.0592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,64,1,0,0.3308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,12288,1,0,66.3263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,256,1,0,1.1284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,512,1,0,2.3624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1024,1,0,5.5733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1536,1,0,9.8936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,16384,1,0,92.0687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,3072,1,0,27.0835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,16,1,0,0.2041
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,4096,1,0,38.4473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,32,1,0,0.3313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,2048,1,0,14.5119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,64,1,0,0.5888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,128,1,0,1.0836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,6144,1,0,60.4140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,256,1,0,2.1654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,512,1,0,5.0225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1024,1,0,11.9270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,8192,1,0,84.3914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,16,1,0,0.3320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,2048,1,0,29.6018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1536,1,0,20.2627
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,32,1,0,0.5858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,64,1,0,1.0814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,128,1,0,2.0758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,3072,1,0,54.6884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,256,1,0,4.6260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,512,1,0,10.6803
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1024,1,0,24.4147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,16,1,0,0.5906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,32,1,0,1.0828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,4096,1,0,76.6625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,64,1,0,2.0883
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1536,1,0,40.4378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,256,1,0,9.9017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,128,1,0,4.4523
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,512,1,0,21.9865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,0,0.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,32,1,0,2.0626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,16,1,0,1.0811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,2048,1,0,59.2867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,64,1,0,4.4771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,128,1,0,9.5082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1024,1,0,48.4511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,256,1,0,20.1085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,512,1,0,43.5669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,16,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,64,1,0,0.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,128,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,32,1,0,0.0947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,256,1,0,0.1394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,0,0.0962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,512,1,0,0.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1024,1,0,0.4008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,2048,1,0,0.8916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1536,1,0,0.6183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,4096,1,0,2.5547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,0,0.1017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,16,1,0,0.0944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,3072,1,0,1.8330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,6144,1,0,3.6556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,32,1,0,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,8192,1,0,4.8773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,10240,1,0,6.4140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,64,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,16384,1,0,11.1750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,128,1,0,0.1356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,32768,1,0,25.2344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,12288,1,0,7.9550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,256,1,0,0.2049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,512,1,0,0.3557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1536,1,0,1.1498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1024,1,0,0.7255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,2048,1,0,1.6763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,3072,1,0,3.3801
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,6144,1,0,7.3353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,10240,1,0,12.7269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,8192,1,0,10.1387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,12288,1,0,15.9431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,0,0.1062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,32,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,4096,1,0,4.7578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,16,1,0,0.0932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,16384,1,0,22.1315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,128,1,0,0.1957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,64,1,0,0.1348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,32768,1,0,51.7075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,256,1,0,0.3395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,512,1,0,0.6383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1536,1,0,2.2206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1024,1,0,1.3498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,2048,1,0,3.2646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,4096,1,0,9.3538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,6144,1,0,14.5184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,10240,1,0,26.2694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,8192,1,0,19.9657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,3072,1,0,6.7775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,16,1,0,0.1087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,12288,1,0,32.0342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,16384,1,0,45.2541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,32,1,0,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,64,1,0,0.1973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,128,1,0,0.3235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,256,1,0,0.6076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1024,1,0,2.6168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,512,1,0,1.1924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1536,1,0,4.5634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,4096,1,0,18.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,3072,1,0,12.8807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,6144,1,0,29.7351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,2048,1,0,6.9138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,32768,1,0,103.7076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,8192,1,0,41.3997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,16,1,0,0.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,32,1,0,0.1969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,10240,1,0,53.2865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,64,1,0,0.3223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,128,1,0,0.5800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,12288,1,0,65.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,256,1,0,1.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,512,1,0,2.3102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1536,1,0,9.5479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,16384,1,0,91.0184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,2048,1,0,14.0626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1024,1,0,5.5704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,3072,1,0,26.5640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,32,1,0,0.3230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,0,0.0950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,4096,1,0,37.5992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,16,1,0,0.1996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,64,1,0,0.5805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,128,1,0,1.0623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,256,1,0,2.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,512,1,0,4.9690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,6144,1,0,59.7474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,8192,1,0,84.3389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1024,1,0,11.4257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1536,1,0,19.4864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,2048,1,0,29.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,16,1,0,0.3224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,32,1,0,0.5828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,64,1,0,1.0655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,128,1,0,2.0292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,256,1,0,4.5680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,3072,1,0,53.9260
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,512,1,0,10.1400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,4096,1,0,76.2532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1024,1,0,23.6748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,16,1,0,0.5793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,32,1,0,1.0632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,64,1,0,2.0289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,128,1,0,4.3805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1536,1,0,40.5748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,2048,1,0,58.7757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,256,1,0,9.3062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,0,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,64,1,0,4.4039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,16,1,0,1.0624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,32,1,0,2.0336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,512,1,0,21.1914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,128,1,0,9.0013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1024,1,0,48.2632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,256,1,0,19.4055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,512,1,0,43.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,0,0.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,64,1,0,0.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,32,1,0,0.0913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,256,1,0,0.1384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,128,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,512,1,0,0.2098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1024,1,0,0.3968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1536,1,0,0.6118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,2048,1,0,0.8759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,4096,1,0,2.5069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,0,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,16,1,0,0.0916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,3072,1,0,1.8114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,6144,1,0,3.6030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,32,1,0,0.0912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,8192,1,0,4.8004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,16384,1,0,11.0121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,64,1,0,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,10240,1,0,6.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,128,1,0,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,32768,1,0,24.9797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,12288,1,0,7.9089
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,256,1,0,0.1998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,512,1,0,0.3484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,2048,1,0,1.6125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1536,1,0,1.1230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1024,1,0,0.7092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,3072,1,0,3.3898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,4096,1,0,4.6580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,8192,1,0,10.0984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,6144,1,0,7.2187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,12288,1,0,15.5407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,16384,1,0,21.6872
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,10240,1,0,12.7386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,16,1,0,0.0931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,128,1,0,0.1892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,32,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,256,1,0,0.3273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1024,1,0,1.3133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,64,1,0,0.1341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,32768,1,0,50.1005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1536,1,0,2.1706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,512,1,0,0.6263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,2048,1,0,3.1981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,4096,1,0,9.1939
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,3072,1,0,6.6440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,8192,1,0,20.0016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,10240,1,0,25.4605
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,16,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,12288,1,0,31.6078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,6144,1,0,14.2322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,0,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,32,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,16384,1,0,43.5750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,64,1,0,0.1897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,128,1,0,0.3157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,256,1,0,0.5859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,512,1,0,1.1579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1024,1,0,2.5312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1536,1,0,4.4506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,3072,1,0,12.6848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,2048,1,0,6.7161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,32768,1,0,102.1784
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,4096,1,0,17.9921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,6144,1,0,28.7551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,16,1,0,0.1338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,8192,1,0,40.6949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,32,1,0,0.1929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,64,1,0,0.3145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,128,1,0,0.5622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,10240,1,0,51.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,256,1,0,1.0832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,512,1,0,2.2255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,12288,1,0,63.7946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1024,1,0,5.4396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1536,1,0,9.3092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,2048,1,0,13.7494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,16384,1,0,88.8945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,3072,1,0,25.7530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,16,1,0,0.1931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,0,0.0923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,32,1,0,0.3174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,4096,1,0,36.7085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,64,1,0,0.5617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,128,1,0,1.0311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,256,1,0,2.0516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,512,1,0,4.7930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,6144,1,0,58.5038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1024,1,0,11.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,8192,1,0,82.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1536,1,0,19.0268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,16,1,0,0.3130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,0,0.0928
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,32,1,0,0.5600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,3072,1,0,52.2771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,64,1,0,1.0298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,128,1,0,1.9811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,2048,1,0,27.9684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,256,1,0,4.4143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,512,1,0,9.9586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,4096,1,0,73.8451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1024,1,0,22.5125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,32,1,0,1.0312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,64,1,0,1.9718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,16,1,0,0.5632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1536,1,0,38.5824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,256,1,0,9.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,128,1,0,4.2505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,0,0.1342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,2048,1,0,56.8968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,512,1,0,20.0290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,16,1,0,1.0356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,64,1,0,4.2705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,128,1,0,8.7087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,32,1,0,1.9629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1024,1,0,45.8876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,16,1,0,0.1834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,0,0.2153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,256,1,0,18.4430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,64,1,0,0.1859
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,512,1,0,41.0660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,32,1,0,0.1781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,128,1,0,0.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,256,1,0,0.2700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,512,1,0,0.4021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,2048,1,0,1.5016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1024,1,0,0.7420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1536,1,0,1.1024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,4096,1,0,3.6862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,6144,1,0,5.3378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,8192,1,0,7.3258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,12288,1,0,11.4711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,10240,1,0,9.2377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,3072,1,0,2.6793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,0,0.2474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,16384,1,0,15.7319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,16,1,0,0.1792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,32,1,0,0.1852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,32768,1,0,35.3705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,128,1,0,0.2680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,64,1,0,0.2101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,256,1,0,0.3959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1024,1,0,1.4017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,2048,1,0,2.9075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,512,1,0,0.7166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1536,1,0,2.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,4096,1,0,7.0329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,3072,1,0,5.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,8192,1,0,14.8203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,6144,1,0,10.7775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,12288,1,0,22.7897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,16,1,0,0.1851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,10240,1,0,19.1293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,0,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,16384,1,0,31.6462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,32,1,0,0.2084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,64,1,0,0.2686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,256,1,0,0.7022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,128,1,0,0.3962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,512,1,0,1.3541
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1024,1,0,2.6866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1536,1,0,4.2241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,32768,1,0,70.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,4096,1,0,14.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,2048,1,0,5.8805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,6144,1,0,21.5892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,3072,1,0,10.2999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,8192,1,0,30.4672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,0,0.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,16,1,0,0.2141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,10240,1,0,40.0515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,32,1,0,0.2678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,64,1,0,0.3954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,128,1,0,0.6997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,256,1,0,1.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,16384,1,0,65.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,512,1,0,2.5667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,12288,1,0,47.2440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1024,1,0,5.4243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1536,1,0,8.6141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,2048,1,0,12.0301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,3072,1,0,20.3751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,4096,1,0,27.3608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,6144,1,0,45.0414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,0,0.1904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,32768,1,0,156.9101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,8192,1,0,61.4441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,16,1,0,0.2693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,32,1,0,0.3944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,64,1,0,0.7001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,128,1,0,1.3154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,10240,1,0,79.8320
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,256,1,0,2.5067
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,512,1,0,5.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,12288,1,0,99.7734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1024,1,0,11.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1536,1,0,17.6639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,2048,1,0,24.5345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,0,0.1836
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,16384,1,0,134.5771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,16,1,0,0.3999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,3072,1,0,42.7957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,32,1,0,0.6984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,64,1,0,1.3214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,4096,1,0,60.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,128,1,0,2.4912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,256,1,0,5.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,512,1,0,10.8294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,6144,1,0,90.8741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1024,1,0,22.8305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,0,0.1975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1536,1,0,36.4076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,16,1,0,0.7037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,32,1,0,1.3151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,2048,1,0,51.0138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,8192,1,0,132.8790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,128,1,0,5.0275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,256,1,0,10.2665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,3072,1,0,86.9408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,512,1,0,22.0344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,0,0.2363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,4096,1,0,120.5118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1024,1,0,46.8952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,16,1,0,1.3292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,32,1,0,2.5176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,64,1,0,5.0781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1536,1,0,78.0434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,256,1,0,21.0469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,0,0.3203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,2048,1,0,110.4707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,128,1,0,10.3057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,512,1,0,44.6102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,16,1,0,2.5288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,32,1,0,5.0385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,64,1,0,10.2441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,0,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1024,1,0,99.9699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,16,1,0,0.1673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,32,1,0,0.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,128,1,0,21.0770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,256,1,0,44.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,64,1,0,0.1446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,128,1,0,0.1923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,256,1,0,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1024,1,0,0.6930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1536,1,0,1.1415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,2048,1,0,1.6653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,512,1,0,0.3471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,512,1,0,103.2249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,4096,1,0,4.3868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,6144,1,0,6.6394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,8192,1,0,8.9825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,12288,1,0,14.1351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,0,0.1800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,10240,1,0,11.5464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,16,1,0,0.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,32,1,0,0.1443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,32768,1,0,42.1491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,256,1,0,0.3230
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,128,1,0,0.2121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,512,1,0,0.6127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1024,1,0,1.3377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1536,1,0,2.1941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,2048,1,0,3.2166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,3072,1,0,6.0854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,4096,1,0,8.5151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,6144,1,0,13.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,8192,1,0,18.1521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,10240,1,0,22.9251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,0,0.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,12288,1,0,28.1224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,16,1,0,0.1447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,16384,1,0,38.5365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,32,1,0,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,64,1,0,0.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,256,1,0,0.5668
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,512,1,0,1.1902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,32768,1,0,84.7227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1024,1,0,2.5518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1536,1,0,4.3249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,2048,1,0,6.3789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,3072,1,0,11.9766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,4096,1,0,16.7339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,6144,1,0,25.9823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,8192,1,0,36.0432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,10240,1,0,46.1519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,0,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,12288,1,0,56.5270
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,32,1,0,0.2142
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,64,1,0,0.3048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,128,1,0,0.5316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,256,1,0,1.0871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,64,1,0,0.1692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,512,1,0,2.2675
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1024,1,0,5.0537
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1536,1,0,8.6679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,2048,1,0,12.8535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,32768,1,0,169.9575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,4096,1,0,32.9484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,6144,1,0,52.3466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,3072,1,0,23.3702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,8192,1,0,72.4666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,0,0.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,32,1,0,0.3039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,10240,1,0,92.0288
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,64,1,0,0.5297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,128,1,0,1.0128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,12288,1,0,113.4092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,256,1,0,2.0546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,512,1,0,4.5168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1536,1,0,17.5451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1024,1,0,10.1253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,16384,1,0,157.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,2048,1,0,25.9948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,16,1,0,0.1670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,0,0.1457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,16384,1,0,78.3157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,3072,1,0,47.2398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,4096,1,0,66.3227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,32,1,0,0.5251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,64,1,0,1.0116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,128,1,0,1.9208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,16,1,0,0.3055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,256,1,0,4.0533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,6144,1,0,105.0109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,512,1,0,8.9987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1024,1,0,20.8984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,0,0.1524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,8192,1,0,146.3890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1536,1,0,35.7400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,16,1,0,0.5321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,32,1,0,1.0048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,64,1,0,1.9167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,128,1,0,3.7844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,2048,1,0,52.8384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,256,1,0,8.2200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,16,1,0,0.2152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,3072,1,0,94.2022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,512,1,0,18.2860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,32,1,0,1.9148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1024,1,0,41.7574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,0,0.1768
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,16,1,0,1.0172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,64,1,0,3.7506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,4096,1,0,132.7176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1536,1,0,71.7456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,128,1,0,7.6319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,0,0.2378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,16,1,0,1.9157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,256,1,0,16.5854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,512,1,0,37.7798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,2048,1,0,108.1549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,64,1,0,7.5980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,32,1,0,3.7612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,128,1,0,15.4620
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,32,1,0,0.1232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,256,1,0,33.5611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,16,1,0,0.1328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,128,1,0,0.1417
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,64,1,0,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,0,0.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1024,1,0,83.9941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,256,1,0,0.1846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,512,1,0,0.2690
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,512,1,0,75.8861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1024,1,0,0.4823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,2048,1,0,1.0728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1536,1,0,0.7516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,3072,1,0,2.2035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,4096,1,0,3.0354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,8192,1,0,5.9534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,10240,1,0,7.6700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,6144,1,0,4.4105
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,12288,1,0,9.4371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,16384,1,0,12.9682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,64,1,0,0.1422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,0,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,32,1,0,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,16,1,0,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,32768,1,0,29.0371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,256,1,0,0.2569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,512,1,0,0.4406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,128,1,0,0.1795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,2048,1,0,2.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1024,1,0,0.9134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1536,1,0,1.4834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,6144,1,0,8.7065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,4096,1,0,5.7465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,8192,1,0,12.0255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,12288,1,0,18.5322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,3072,1,0,4.1746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,16384,1,0,25.6566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,10240,1,0,15.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,0,0.1489
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,32,1,0,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,16,1,0,0.1255
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,64,1,0,0.1799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,256,1,0,0.4244
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,512,1,0,0.8353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,32768,1,0,58.1805
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,2048,1,0,4.2158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,3072,1,0,8.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1536,1,0,2.9207
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,6144,1,0,17.0782
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,128,1,0,0.2507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,4096,1,0,11.1637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,8192,1,0,23.6740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,0,0.1472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,12288,1,0,37.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,16,1,0,0.1436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,10240,1,0,30.4853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,32,1,0,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,16384,1,0,51.7327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,64,1,0,0.2504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,128,1,0,0.4000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,256,1,0,0.7863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,512,1,0,1.6598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1536,1,0,5.8564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1024,1,0,3.5123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,3072,1,0,15.4307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,2048,1,0,8.4487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,4096,1,0,21.6621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,32768,1,0,116.6062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,6144,1,0,34.4374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,0,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,16,1,0,0.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,8192,1,0,47.7749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,32,1,0,0.2487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,10240,1,0,60.4846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,64,1,0,0.4030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,128,1,0,0.7441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,256,1,0,1.5481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,12288,1,0,74.6257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,512,1,0,3.2328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1024,1,0,7.0749
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1536,1,0,11.7979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,16384,1,0,103.5240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,2048,1,0,17.2414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,0,0.1262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,3072,1,0,31.1234
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,32,1,0,0.4036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,4096,1,0,43.6487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,256,1,0,3.0217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,6144,1,0,69.1021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,128,1,0,1.4674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,512,1,0,6.4973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1024,1,0,14.3763
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,8192,1,0,95.2974
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1536,1,0,23.8486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,0,0.1274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,2048,1,0,34.7895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,16,1,0,0.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,32,1,0,0.7372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,3072,1,0,62.4478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,64,1,0,1.4746
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,128,1,0,2.8572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,4096,1,0,87.2929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,512,1,0,13.2159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1024,1,0,29.3394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,0,0.1476
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,16,1,0,0.7553
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1536,1,0,48.0597
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,32,1,0,1.4850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,64,1,0,2.8727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,2048,1,0,69.5039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,128,1,0,5.7680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,16,1,0,1.4795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,512,1,0,27.0256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,0,0.1931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1024,1,0,58.2785
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,32,1,0,2.8808
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,128,1,0,11.7758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,64,1,0,5.7702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,64,1,0,0.7495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,16,1,0,0.1236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,32,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,256,1,0,25.1531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,128,1,0,0.1193
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,64,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,512,1,0,53.6132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,256,1,0,0.1603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,512,1,0,0.2327
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1024,1,0,0.3951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,2048,1,0,0.7965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1536,1,0,0.5804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,4096,1,0,2.2753
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,3072,1,0,1.6515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,6144,1,0,3.3335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,8192,1,0,4.4591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,12288,1,0,7.1162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,10240,1,0,5.7439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,16384,1,0,9.7789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,16,1,0,0.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,32768,1,0,22.4452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,32,1,0,0.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,64,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,128,1,0,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,256,1,0,0.2245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,512,1,0,0.3735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1024,1,0,0.7087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1536,1,0,1.0795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,2048,1,0,1.5163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,3072,1,0,3.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,4096,1,0,4.3791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,8192,1,0,9.0601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,10240,1,0,11.2250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,16384,1,0,19.3715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,12288,1,0,13.8577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,0,0.1314
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,16,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,32,1,0,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,32768,1,0,45.1915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,128,1,0,0.2209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,64,1,0,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,256,1,0,0.3599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,512,1,0,0.6649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1536,1,0,2.2190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,2048,1,0,3.1167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1024,1,0,1.3425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,3072,1,0,6.1814
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,4096,1,0,8.4043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,8192,1,0,17.5802
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,10240,1,0,22.7251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,12288,1,0,27.8144
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,0,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,16384,1,0,39.0350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,16,1,0,0.1178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,64,1,0,0.2214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,128,1,0,0.3492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,256,1,0,0.6444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,6144,1,0,12.6461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,0,0.1221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,32768,1,0,90.4799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,512,1,0,1.2524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1024,1,0,2.7637
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1536,1,0,4.4998
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,2048,1,0,6.3000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,3072,1,0,11.6047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,4096,1,0,16.1721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,6144,1,0,25.4639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,8192,1,0,35.5294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,6144,1,0,6.4953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,10240,1,0,45.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,12288,1,0,55.6416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,16,1,0,0.1575
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,32,1,0,0.2201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,128,1,0,0.6200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,64,1,0,0.3447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,256,1,0,1.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1536,1,0,9.0324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,16384,1,0,77.6671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1024,1,0,5.5699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,2048,1,0,12.8400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,512,1,0,2.6085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,3072,1,0,23.3482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,4096,1,0,32.7497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,16,1,0,0.2178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,64,1,0,0.6236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,32,1,0,0.3485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,6144,1,0,51.0539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,128,1,0,1.1738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,512,1,0,5.2780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,256,1,0,2.5018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,8192,1,0,70.7721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1024,1,0,11.3504
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,0,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,32,1,0,0.6237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,16,1,0,0.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,2048,1,0,26.0671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,3072,1,0,46.5366
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,128,1,0,2.4298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,256,1,0,5.0387
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,512,1,0,10.7271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1024,1,0,23.1631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,4096,1,0,65.1361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,0,0.1218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,16,1,0,0.6247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1536,1,0,36.7708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,32,1,0,1.1755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,64,1,0,2.4394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,2048,1,0,51.7471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,128,1,0,4.8995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,256,1,0,10.2533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,0,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,512,1,0,21.7591
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,16,1,0,1.1726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,32,1,0,2.4388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,64,1,0,4.8991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1024,1,0,46.2347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,128,1,0,10.0293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,16,1,0,0.1122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,256,1,0,20.7956
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,32,1,0,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,64,1,0,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,512,1,0,43.4681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,128,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1536,1,0,0.5479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,512,1,0,0.2165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,256,1,0,0.1469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,2048,1,0,0.7547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1024,1,0,0.3700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,3072,1,0,1.5896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,4096,1,0,2.1454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,8192,1,0,4.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,6144,1,0,3.0153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,10240,1,0,5.5066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,12288,1,0,6.8832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,16384,1,0,9.4278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,0,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,64,1,0,1.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,16,1,0,0.1039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,32768,1,0,21.8401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,32,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,128,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,256,1,0,0.2060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,512,1,0,0.3432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1024,1,0,0.6616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1536,1,0,1.0130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,3072,1,0,2.8912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,4096,1,0,4.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,6144,1,0,6.2702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,8192,1,0,8.6590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,12288,1,0,13.3673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,10240,1,0,10.8719
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,16384,1,0,18.7022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,2048,1,0,1.4062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,32,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,32768,1,0,43.8229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,16,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,128,1,0,0.2019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,64,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,256,1,0,0.3369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,512,1,0,0.6216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1024,1,0,1.2247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,2048,1,0,2.8388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,3072,1,0,5.9457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,4096,1,0,8.0819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,6144,1,0,12.1502
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1536,1,0,1.9233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,10240,1,0,21.8262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,8192,1,0,16.8878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,0,0.1113
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,16,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,16384,1,0,37.5103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,32,1,0,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,12288,1,0,26.8023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,64,1,0,0.2037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,128,1,0,0.3257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,256,1,0,0.5944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,512,1,0,1.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1024,1,0,2.4660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,32768,1,0,87.8355
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1536,1,0,4.1980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,3072,1,0,11.1294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,6144,1,0,24.4036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,4096,1,0,15.4319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,64,1,0,0.1123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,8192,1,0,34.0910
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,0,0.1131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,10240,1,0,43.4950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,16,1,0,0.1441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,12288,1,0,53.5484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,64,1,0,0.3239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,128,1,0,0.5773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,32,1,0,0.2027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,256,1,0,1.1064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,512,1,0,2.3084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,16384,1,0,75.0938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1024,1,0,5.2254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1536,1,0,8.4983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,2048,1,0,12.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,4096,1,0,31.3097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,32,1,0,0.3226
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,16,1,0,0.2038
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,6144,1,0,48.8225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,64,1,0,0.5804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,3072,1,0,22.2913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,128,1,0,1.0694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,256,1,0,2.2026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,8192,1,0,68.3363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,512,1,0,4.8741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1024,1,0,10.6398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,0,0.0993
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,2048,1,0,24.4694
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,16,1,0,0.3222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1536,1,0,17.1654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,32,1,0,0.5793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,3072,1,0,44.6162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,128,1,0,2.1192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,256,1,0,4.6953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,4096,1,0,62.2595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,512,1,0,9.9545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1024,1,0,21.5527
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,16,1,0,0.5807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,32,1,0,1.0682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,128,1,0,4.5628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,2048,1,0,48.9286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,256,1,0,9.5942
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,512,1,0,20.1871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,32,1,0,2.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,16,1,0,1.0700
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,0,0.1450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1024,1,0,42.9362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,64,1,0,4.5588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,128,1,0,9.3313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,16,1,0,0.1659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,256,1,0,19.4678
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,32,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,64,1,0,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,512,1,0,40.3021
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,128,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,256,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,512,1,0,0.2081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1536,1,0,0.5311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1024,1,0,0.3596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,2048,1,0,0.7351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,3072,1,0,1.5398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,4096,1,0,2.1175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,6144,1,0,2.9676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,8192,1,0,3.9378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,12288,1,0,6.6051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,10240,1,0,5.2414
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,16384,1,0,9.0906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,16,1,0,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,32768,1,0,21.5145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,32,1,0,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,64,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,128,1,0,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,256,1,0,0.2025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1024,1,0,0.6376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,64,1,0,2.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1536,1,0,0.9712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,3072,1,0,2.8525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,4096,1,0,3.9097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,6144,1,0,5.9708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,8192,1,0,8.3578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,12288,1,0,13.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,10240,1,0,10.5279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,16384,1,0,18.3580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,16,1,0,0.0983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,32768,1,0,43.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,64,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,32,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,128,1,0,0.1952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,256,1,0,0.3250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,512,1,0,0.6003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1024,1,0,1.1828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,2048,1,0,2.6324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1536,1,0,1.8614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,3072,1,0,5.6994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,4096,1,0,7.7291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,6144,1,0,11.8830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,8192,1,0,16.6103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,12288,1,0,26.3918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,16384,1,0,37.0638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,16,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,32,1,0,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,64,1,0,0.1958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,128,1,0,0.3163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,256,1,0,0.5708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,32768,1,0,86.5046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,512,1,0,1.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1024,1,0,2.2781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,512,1,0,0.3321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,2048,1,0,5.5952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1536,1,0,3.9393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,3072,1,0,10.8145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,4096,1,0,15.1685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,6144,1,0,23.9909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,10240,1,0,42.5798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,16,1,0,0.1380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,32,1,0,0.1955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,8192,1,0,33.7268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,12288,1,0,52.8295
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,128,1,0,0.5577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,256,1,0,1.0578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,16384,1,0,73.9219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,512,1,0,2.1149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1024,1,0,4.8977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1536,1,0,8.2674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,2048,1,0,11.7996
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,3072,1,0,21.9143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,0,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,16,1,0,0.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,4096,1,0,30.5796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,32,1,0,0.3162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,6144,1,0,47.8280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,256,1,0,2.0200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,128,1,0,1.0245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,64,1,0,0.5593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,8192,1,0,67.2411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,512,1,0,4.5394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1536,1,0,16.7382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1024,1,0,10.3458
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,16,1,0,0.3157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,0,0.0999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,32,1,0,0.5598
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,2048,1,0,23.8760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,3072,1,0,43.7281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,64,1,0,1.0221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,256,1,0,4.3563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,128,1,0,2.0015
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,512,1,0,9.6726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,0,0.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1024,1,0,21.0816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1536,1,0,33.7283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,32,1,0,1.0252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,16,1,0,0.5578
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,64,1,0,1.9516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,128,1,0,4.2427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,2048,1,0,47.6220
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,256,1,0,9.3007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,0,0.1385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,512,1,0,19.7723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,16,1,0,1.0262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,32,1,0,1.9654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1024,1,0,41.8070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,64,1,0,0.3111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,64,1,0,4.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,128,1,0,9.0332
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,256,1,0,19.0200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,32,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,16,1,0,0.1063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,64,1,0,0.0964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,128,1,0,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,512,1,0,39.2837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,512,1,0,0.2030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,256,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1024,1,0,0.3554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1536,1,0,0.5233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,3072,1,0,1.5305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,2048,1,0,0.7212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,4096,1,0,2.0791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,6144,1,0,2.9558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,8192,1,0,3.8893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,10240,1,0,5.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,12288,1,0,6.4456
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,16,1,0,0.0972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,32,1,0,0.0969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,64,1,0,0.1079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,32768,1,0,20.9352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,128,1,0,0.1345
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,512,1,0,0.3287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1536,1,0,0.9614
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1024,1,0,0.6330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,2048,1,0,1.3382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,3072,1,0,2.8145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,4096,1,0,3.8469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,6144,1,0,5.8564
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,4096,1,0,61.0714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,10240,1,0,10.2290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,8192,1,0,8.2592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,16,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,16384,1,0,17.7959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,32,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,32768,1,0,42.6876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,64,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,128,1,0,0.1899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,256,1,0,0.3127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,512,1,0,0.5890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1536,1,0,1.8369
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,2048,1,0,2.6013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1024,1,0,1.1644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,3072,1,0,5.5796
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,4096,1,0,7.6432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,6144,1,0,11.4788
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,8192,1,0,16.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,10240,1,0,20.9473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,12288,1,0,25.9133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,0,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,16,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,16384,1,0,36.3661
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,32,1,0,0.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,64,1,0,0.1898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,256,1,0,0.5667
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,128,1,0,0.3088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,512,1,0,1.0844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1024,1,0,2.2404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,32768,1,0,85.9380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1536,1,0,3.8007
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,2048,1,0,5.5276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,4096,1,0,14.6356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,8192,1,0,32.9797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,0,0.1088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,10240,1,0,42.0278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,16,1,0,0.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,12288,1,0,52.3185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,32,1,0,0.1884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,64,1,0,0.3123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,128,1,0,0.5466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,256,1,0,1.0372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,512,1,0,2.0772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1024,1,0,4.7874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,3072,1,0,21.3245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,2048,1,0,11.2293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,4096,1,0,30.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,6144,1,0,47.5236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,0,0.0979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,16,1,0,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,32,1,0,0.3121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,64,1,0,0.5467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,128,1,0,1.0085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,8192,1,0,66.4069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,256,1,0,1.9813
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,512,1,0,4.4526
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1024,1,0,9.8344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,2048,1,0,23.2491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,3072,1,0,43.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,0,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,3072,1,0,10.4264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,4096,1,0,60.6658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,16,1,0,0.3069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,32,1,0,0.5491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,64,1,0,1.0014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,512,1,0,9.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,256,1,0,4.2806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1024,1,0,20.3603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,16384,1,0,73.6170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1536,1,0,33.3147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,16,1,0,0.5468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,32,1,0,1.0072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1536,1,0,7.8554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,2048,1,0,47.0278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,64,1,0,1.9266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,128,1,0,4.1488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,256,1,0,8.7424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,512,1,0,19.1210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,32,1,0,1.9147
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,0,0.1359
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,16,1,0,1.0103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,64,1,0,4.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1024,1,0,41.2045
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,128,1,0,8.5247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,256,1,0,18.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,32,1,0,0.0929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,64,1,0,0.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,512,1,0,38.6515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,0,0.0973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,256,1,0,0.1357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,128,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,512,1,0,0.1986
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1024,1,0,0.3454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1536,1,0,0.5126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,2048,1,0,0.7074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,3072,1,0,1.5482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,4096,1,0,2.0758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,10240,1,0,5.0367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,6144,1,0,2.8740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,16384,1,0,8.8683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,12288,1,0,6.3184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,0,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,16,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,32,1,0,0.0968
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,32768,1,0,20.6688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,64,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,128,1,0,1.9128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,128,1,0,0.1318
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,512,1,0,0.3241
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,256,1,0,0.1904
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1024,1,0,0.6099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1536,1,0,0.9400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,2048,1,0,1.3077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,3072,1,0,2.7529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,6144,1,0,5.7653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,8192,1,0,8.0935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,4096,1,0,3.8189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,10240,1,0,10.0854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,12288,1,0,12.5758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,16384,1,0,17.5429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,32,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,16,1,0,0.0955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,64,1,0,0.1319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,128,1,0,0.1829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,512,1,0,0.5716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,32768,1,0,41.5953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1024,1,0,1.1274
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1536,1,0,1.7860
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,3072,1,0,5.4304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,4096,1,0,7.4697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,2048,1,0,2.5308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,8192,1,0,15.8133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,6144,1,0,11.2778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,10240,1,0,20.3945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,12288,1,0,25.1704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,16,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,16384,1,0,35.3002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,0,0.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,64,1,0,0.1821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,32,1,0,0.1313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,128,1,0,0.3040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,256,1,0,0.5499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,512,1,0,1.0515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,32768,1,0,83.8184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1024,1,0,2.1794
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,2048,1,0,5.3676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,8192,1,0,3.8150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,3072,1,0,10.1899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1536,1,0,3.6848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,4096,1,0,14.3389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,6144,1,0,22.6508
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,0,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,8192,1,0,31.9831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,16,1,0,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,32,1,0,0.1849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,10240,1,0,40.5756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,12288,1,0,50.4352
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,64,1,0,0.2984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,128,1,0,0.5330
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,256,1,0,1.0090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,512,1,0,2.0050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1024,1,0,4.6432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,16384,1,0,71.3252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,2048,1,0,10.9515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,3072,1,0,20.7053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1536,1,0,7.6348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,0,0.0960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,4096,1,0,28.8834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,16,1,0,0.1850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,32,1,0,0.3022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,6144,1,0,45.7215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,512,1,0,4.3177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,256,1,0,1.9084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,128,1,0,0.9772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,8192,1,0,64.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1024,1,0,9.5652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1536,1,0,15.4233
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,16,1,0,0.3023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,2048,1,0,22.1588
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,0,0.0963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,32,1,0,0.5279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,3072,1,0,41.3717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,128,1,0,1.8638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,4096,1,0,58.4912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,512,1,0,8.8405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,256,1,0,4.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1024,1,0,19.2707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,0,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,16,1,0,0.5329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,32,1,0,0.9695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1536,1,0,31.5354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,2048,1,0,45.2291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,64,1,0,1.8519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,256,1,0,8.4686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,128,1,0,4.0052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,0,0.1317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,16,1,0,0.9751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,512,1,0,17.9154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,32,1,0,1.8375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1024,1,0,39.1227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,64,1,0,3.9983
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,128,1,0,8.2580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,256,1,0,17.2685
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.1889
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.1900
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.1932
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,512,1,0,36.3999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,0,0.1806
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.2469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.3371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,0.6227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,1.3025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,0.9005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,2.9209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,4.5134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,6.2040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,2.0574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,9.6844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,13.4797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,7.8964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,0,0.1858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.1893
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.2097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,31.2929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,512,1,0,0.5697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.2432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,1.0958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,64,1,0,0.9766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,2.4510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,1.6826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,5.5874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,3.9887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,8.7994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,12.1917
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,15.8092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,0,0.1905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,19.4185
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.1948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,27.0902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.2108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.2437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.3164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,512,1,0,0.9985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,2.0569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,3.2536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,62.8054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,256,1,0,0.5431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,4.6676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,10.9716
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,7.7829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,17.6056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,0,0.1875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,24.6084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,31.6886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.2103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.2436
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,38.9599
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.3170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,54.9024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,128,1,0,0.5333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,256,1,0,0.9450
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,512,1,0,1.8659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,3.8665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,6.3096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,15.5433
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,9.1386
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,22.0215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.3246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,127.3832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,35.5279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,0,0.1898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,49.5697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.2464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.3170
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,64,1,0,0.5319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,63.8778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,256,1,0,1.7603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,128,1,0,0.9248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,79.3478
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,512,1,0,3.4701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,7.5530
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,12.6461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,18.4693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,16384,1,0,110.8023
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,0,0.1958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,31.3160
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.3189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,32,1,0,0.5333
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,44.2798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,64,1,0,0.9276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,128,1,0,1.7168
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,512,1,0,6.7918
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,256,1,0,3.2818
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,71.6973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,25.2149
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,8192,1,0,99.5977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,16,1,0,0.5394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,0,0.2080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,15.3116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,37.7607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,32,1,0,0.9272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,64,1,0,1.7201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,256,1,0,6.3825
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,3072,1,0,63.6158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,128,1,0,3.1964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,512,1,0,13.7594
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,31.1174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,4096,1,0,90.3382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,16,1,0,0.9363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,0,0.2347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,32,1,0,1.7182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,64,1,0,3.2169
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1536,1,0,50.9622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,128,1,0,6.2140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,16,1,0,1.7453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,256,1,0,13.0131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,0,0.2972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,2048,1,0,75.5445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,32,1,0,3.2128
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,512,1,0,28.1453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,64,1,0,6.2171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,128,1,0,12.6480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,63.0364
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.1463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,256,1,0,26.4380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.1542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.7063
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.2156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,0,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,0.7592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,512,1,0,56.4497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,2.1358
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,3.8254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,5.5639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,1.3415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,16.0001
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,8.9975
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,19.4326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.3406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,26.7024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,12.3133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,0,0.1429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.1535
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.1703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.2014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,1.4424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,59.0073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.2958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,512,1,0,0.5840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,7.6061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,2.6375
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,10.9201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,24.8135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,4.2381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,39.2888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,31.8497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,0,0.1439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,17.9219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.1544
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.1720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.2018
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.2717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,256,1,0,0.4913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,54.0954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,2.7871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,5.2377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,8.3501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,15.2124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,119.4299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,22.0472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,512,1,0,1.0830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,36.2654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,0,0.1449
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,50.4353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.1689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.2016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,64.9810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.2723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,128,1,0,0.4421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,256,1,0,0.8868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,79.3178
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,512,1,0,2.1061
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,5.4589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,10.2839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,16.7773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,109.7640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,30.7328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,44.3474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,0,0.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.2033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,73.1191
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.2715
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,64,1,0,0.4426
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,128,1,0,0.7944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,102.0457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,256,1,0,1.6738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,512,1,0,4.0461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,32768,1,0,240.2981
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,130.1710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,10.8110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,21.0379
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,33.7377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,12288,1,0,160.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,0,0.1514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,16,1,0,0.2726
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,61.3850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,32,1,0,0.4429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,64,1,0,0.7915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,128,1,0,1.4857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,256,1,0,3.1484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,220.2951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,89.9944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,512,1,0,8.0821
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,21.8886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,0,0.1608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,42.4767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,16,1,0,0.4482
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,146.4430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,32,1,0,0.7898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,128,1,0,2.7739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,67.9706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,64,1,0,1.4868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,256,1,0,6.1532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,8192,1,0,203.3439
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,512,1,0,16.4434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,0,0.1835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,123.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,16,1,0,0.7964
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,43.9976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,32,1,0,1.4742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,64,1,0,2.7772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,128,1,0,5.4092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,256,1,0,12.5093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,0,0.2307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,180.2757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1536,1,0,84.6775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,16,1,0,1.4929
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,512,1,0,33.1635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,32,1,0,2.7743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,64,1,0,5.4072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,128,1,0,11.0135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,2048,1,0,136.2628
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.1195
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,0,0.1074
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,87.7919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.1712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,256,1,0,25.0834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.2453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.4750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,512,1,0,66.2870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,1.2308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,3.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,6.8600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,2.1302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,0.7940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,8.7874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,10.8521
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,4.9277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.1177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.1254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,15.1783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,34.6453
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.2228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.3903
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,0.8691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,2.3618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,4.1973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.1389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,9.7136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,6.0181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,13.7655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,21.8289
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,17.9036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,1.5028
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,30.5961
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.3422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,512,1,0,0.6946
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.2059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,4.6037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,69.8525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,2.9833
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,12.0380
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,19.7896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,8.3084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,1.6444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,27.8146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,44.0648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.1643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,35.8611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.1407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.3154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,62.0714
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,512,1,0,1.2905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,3.1621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,5.8097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,256,1,0,0.5944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,9.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,24.5515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,16.8748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,40.3658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,32768,1,0,141.9849
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,56.6695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.1635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.2064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,64,1,0,0.3157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,128,1,0,0.5481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,512,1,0,2.4682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,72.3985
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,256,1,0,1.0963
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,89.2183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,6.2969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,18.7125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,11.8839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,124.8897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,0,0.1201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,64,1,0,0.5461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.3162
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,128,1,0,1.0043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,33.9406
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,256,1,0,2.0225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,49.7684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,80.6877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,512,1,0,4.8870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,12.7740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,8192,1,0,113.3635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,24.0319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.3167
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,37.8463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,0,0.1292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,32,1,0,0.5471
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,64,1,0,0.9977
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,128,1,0,1.8460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,512,1,0,10.1496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,67.9048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,256,1,0,3.9237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,25.6934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,0,0.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,16,1,0,0.5501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,98.7632
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,64,1,0,1.8384
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,32,1,0,1.0053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,128,1,0,3.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,47.9212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,0,0.1773
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,256,1,0,8.0797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,512,1,0,20.2868
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,75.5969
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,32,1,0,1.8367
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,16,1,0,1.0121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,64,1,0,3.5682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,256,1,0,16.3577
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1024,1,0,51.2339
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,128,1,0,7.2869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,512,1,0,40.3688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,0,0.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.1940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,0.3430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,0.5281
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,0.7816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,1.2980
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,3.0108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.1428
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,6.6670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,1.8611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,9.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,4.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,5.3799
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.1196
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.2899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.1862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,22.8529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.1397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,1.4430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,0.5973
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.1124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,0.9665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,3.6025
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,8.1723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,2.4979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,5.7792
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,10.7670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,0,0.1073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,13.4143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.1766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.1212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,46.3229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,19.0558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.2665
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.1403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,1.0887
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,1.8560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,4.8934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,512,1,0,0.5068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,11.7469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,16.5640
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,2.7403
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,21.9094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.1198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,38.6572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.1396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.1770
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,27.0881
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,7.1035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.2517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,2.0212
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,256,1,0,0.4552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,3.5497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,9.9240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,32768,1,0,92.9467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,512,1,0,0.9043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,14.4691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,5.4725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,33.5397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,23.7734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,43.8480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.1757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,256,1,0,0.8120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,512,1,0,1.6653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,3.9766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,128,1,0,0.4297
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,54.0921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,7.3070
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,11.1962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.2516
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,16384,1,0,77.2130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,19.9536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,29.4078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.1744
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,64,1,0,0.4308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,128,1,0,0.7654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,512,1,0,3.2455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,47.8042
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.2524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,8.2659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,14.6809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,256,1,0,1.4760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,8192,1,0,67.9397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.2525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,22.9376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,64,1,0,0.7657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,128,1,0,1.3673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,40.4370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,32,1,0,0.4315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,256,1,0,2.8361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,512,1,0,6.8392
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,4096,1,0,58.7004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,16,1,0,0.4307
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,32,1,0,0.7634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,0,0.1235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1536,1,0,30.0287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,64,1,0,1.3759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,128,1,0,2.6221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,45.3452
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,0,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,16.6702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,512,1,0,13.9017
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,256,1,0,5.8275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,32,1,0,1.3673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,16,1,0,0.7682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,0,0.0930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1024,1,0,33.0846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,256,1,0,11.8022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,64,1,0,2.6216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,128,1,0,5.5016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.1051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.1343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.3117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,512,1,0,27.7137
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.1811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,0.4809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,1.6743
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,3.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,1.1653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,0.7103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,8.5293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,4.8217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,2.6819
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,5.9484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,21.1581
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.1311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.1083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,0.5413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,0.8736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,1.2957
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.2693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,5.2362
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,7.3684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,2.2462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,3.1480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,11.9616
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,9.7227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,42.7181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.1090
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.1321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,17.3096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.2457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,512,1,0,0.4617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,0.9854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,2.4643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,6.2475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,4.3131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,1.6481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,10.4884
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,19.5351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,34.8269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,15.0200
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.1157
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,24.2880
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.1626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,256,1,0,0.4224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.2349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,512,1,0,0.8250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,1.8225
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,4.8748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,32768,1,0,85.2009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,8.7431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,3.1722
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,12.8888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,21.4440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,0,0.1049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,30.5256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.1655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,49.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.2338
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,128,1,0,0.3971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,39.6291
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,256,1,0,0.7378
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,3.5926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,9.9635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,6.5177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,16384,1,0,69.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,512,1,0,1.5012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,18.0688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,26.3420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,0,0.1046
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,64,1,0,0.3979
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,256,1,0,1.3271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,512,1,0,2.9776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,128,1,0,0.6960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,42.8754
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.2341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,8192,1,0,61.5506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,13.0804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,7.3952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.2351
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,20.3393
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,64,1,0,0.6971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,128,1,0,1.2424
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,256,1,0,2.5736
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,4096,1,0,52.9913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,32,1,0,0.4004
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,15.0444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,36.6850
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,26.5283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,512,1,0,6.1602
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,16,1,0,0.3994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,64,1,0,1.2496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,128,1,0,2.4082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,2048,1,0,40.9623
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,32,1,0,0.6947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,256,1,0,5.2857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,16,1,0,0.7037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,0,0.1373
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1024,1,0,30.1158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,32,1,0,1.2552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,512,1,0,12.5060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,64,1,0,2.3820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,128,1,0,4.9481
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,512,1,0,25.3331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.1060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,0,0.0890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.1759
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,256,1,0,10.7741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,1.1473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,0.6954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.1322
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,0.4657
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,3.6701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,4.7683
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,5.9098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,8.3555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,2.6515
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,1.6344
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,20.7990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.3036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.1687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.2639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,0,0.0997
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.1132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,1.2761
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,2.2120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,0.8517
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,7.2253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,5.0639
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,3.1109
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,17.0775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,11.8772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,9.4361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,0.5313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,0,0.0992
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.2440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,42.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,512,1,0,0.4462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.1285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,0.9659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,4.1933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,6.1955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,1.6130
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,2.4044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,19.4221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,24.1988
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,10.3781
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.1120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,14.7519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.1283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,256,1,0,0.4084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.1607
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,512,1,0,0.7971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,34.2652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,1.7702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,32768,1,0,84.1899
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,4.7293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,12.6696
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,3.0952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,20.7636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,30.1634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,8.6898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.2272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.1280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,38.9556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,0,0.1034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.1611
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,128,1,0,0.3862
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,256,1,0,0.7122
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,47.8631
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.2282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,16384,1,0,69.7224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,3.4522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,512,1,0,1.4484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,9.9029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,17.5176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,25.8507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,6.3279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.2272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,128,1,0,0.6739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,256,1,0,1.2760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,64,1,0,0.3834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,8192,1,0,60.4022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,42.8546
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,512,1,0,2.8556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,7.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,12.9528
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.2278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,64,1,0,0.6741
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,20.0240
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,3072,1,0,35.4839
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,32,1,0,0.3851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,512,1,0,5.9571
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,14.6897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,256,1,0,2.4686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,4096,1,0,51.6826
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,25.9835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,128,1,0,1.1879
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,16,1,0,0.3878
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,64,1,0,1.1994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,32,1,0,0.6745
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,2048,1,0,39.9596
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,256,1,0,5.0943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,128,1,0,2.2764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,16,1,0,0.6762
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,512,1,0,12.1277
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,29.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,32,1,0,1.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,0,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,64,1,0,2.2658
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,128,1,0,4.7331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,0,0.0896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,256,1,0,10.3529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,512,1,0,24.5055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.1316
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.3027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,0.6911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,1.6412
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,1.1457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,3.6231
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,4.6864
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,5.8723
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.4656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,0,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,8.3734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.1040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,20.9249
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.1760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.2572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.1634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,0.5272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,2.6065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,0.8389
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,1.2653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,3.0765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,5.0361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,7.2112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,11.8182
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,9.3494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,17.0673
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.2363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.1584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,2.1913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,1.6068
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,0.9467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,512,1,0,0.4405
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,2.3874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,4.2565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,14.5908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,41.9024
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,6.0556
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,23.8006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,19.3496
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,10.2370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,0,0.0994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.2262
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,34.4100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,512,1,0,0.7943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,256,1,0,0.4047
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,3.0695
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,1.7216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,32768,1,0,84.1066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,8.6197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,20.5953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,4.6497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,12.5407
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,29.7336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,0,0.1008
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.2250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,256,1,0,0.7020
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.1585
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,47.7391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,38.8953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,128,1,0,0.3790
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,3.4302
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,512,1,0,1.4183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,16384,1,0,69.0619
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,9.5931
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,17.3659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,6.2268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.1565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,6144,1,0,42.4966
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,64,1,0,0.3809
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,128,1,0,0.6579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,256,1,0,1.2563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,8192,1,0,60.0739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,512,1,0,2.7437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,25.5687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,19.8029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,12.9053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.2248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,35.4328
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,7.0415
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,32,1,0,0.3797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,128,1,0,1.1775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,512,1,0,5.8485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,52.0044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,256,1,0,2.3892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,64,1,0,0.6608
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,0,0.1102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,64,1,0,1.1733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1536,1,0,25.9317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,128,1,0,2.2397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,32,1,0,0.6569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,39.3702
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,14.4250
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,16,1,0,0.3798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,0,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,512,1,0,12.1030
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,256,1,0,5.0324
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,16,1,0,0.6582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,32,1,0,1.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,128,1,0,4.6252
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,256,1,0,10.2921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,64,1,0,2.2217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.1009
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,512,1,0,24.0461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,28.6629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.1096
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.2987
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,0.6869
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.4671
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,0,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.1305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,1.1353
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,1.6181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,3.6155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.1729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,4.7019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,2.6187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,20.7654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,5.8383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,8.3617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.1261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,0.5192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,0.8377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,1.2595
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,2.1967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,5.0003
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.2583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,9.4165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,7.1866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,17.0444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,3.1019
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,11.7766
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.1098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,0,0.0990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.1580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.2370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,0.9385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,1.5755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,41.8131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,512,1,0,0.4411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,4.2145
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,6.0654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,2.3827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,10.1372
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,0,0.0976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,14.5370
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,23.7201
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.1093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,19.1106
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,34.4054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.1569
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,256,1,0,0.4013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,512,1,0,0.7729
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,1.7308
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,3.0497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.2237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,4.6751
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,8.5248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,20.6735
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,32768,1,0,83.3921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,12.6662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,29.5376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,38.4002
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.2238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,12288,1,0,47.4545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,256,1,0,0.6999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,512,1,0,1.4152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,6.2121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,128,1,0,0.3772
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,9.6618
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,16384,1,0,69.4791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,17.2999
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,3.3892
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,25.8684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.2216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,64,1,0,0.3771
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,6144,1,0,41.9246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,0,0.1027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,128,1,0,0.6503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,256,1,0,1.2554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,512,1,0,2.7823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,59.3491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,6.9911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,12.5734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,19.7890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,32,1,0,0.3780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,64,1,0,0.6534
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,256,1,0,2.3924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,128,1,0,1.1562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,4096,1,0,51.5926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,3072,1,0,35.0677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,512,1,0,5.7687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,14.3621
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,16,1,0,0.3777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,128,1,0,2.2155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,25.7377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,64,1,0,1.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,256,1,0,4.9846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,2048,1,0,39.8080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,32,1,0,0.6529
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,512,1,0,11.8593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,16,1,0,0.6552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,64,1,0,2.2172
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,32,1,0,1.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,28.7437
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,128,1,0,4.6427
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,256,1,0,10.0703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,16,1,0,0.1854
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,32,1,0,0.1873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,64,1,0,0.1934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,512,1,0,23.6060
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,256,1,0,0.2430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,0,0.1739
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,128,1,0,0.2082
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1024,1,0,0.5984
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,3072,1,0,1.7525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,4096,1,0,2.4487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,512,1,0,0.3336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,6144,1,0,3.7171
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,10240,1,0,6.4593
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,8192,1,0,5.0824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,12288,1,0,7.9209
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,2048,1,0,1.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1536,1,0,0.8239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,0,0.1810
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,16,1,0,0.1877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,32,1,0,0.1920
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,64,1,0,0.2088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,16384,1,0,11.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,256,1,0,0.3285
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,512,1,0,0.5682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,128,1,0,0.2411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1536,1,0,1.5469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,32768,1,0,26.2936
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,3072,1,0,3.3949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,2048,1,0,2.1711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,4096,1,0,4.6388
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1024,1,0,1.0524
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,8192,1,0,9.9707
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,6144,1,0,7.2111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,12288,1,0,15.9431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,32,1,0,0.2081
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,10240,1,0,12.9394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,0,0.1830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,16,1,0,0.1925
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,16384,1,0,22.4136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,128,1,0,0.3247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,64,1,0,0.2408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,32768,1,0,52.9208
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,256,1,0,0.5554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,512,1,0,1.0000
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1024,1,0,1.9699
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,2048,1,0,4.1083
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1536,1,0,2.9769
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,4096,1,0,9.0926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,3072,1,0,6.5853
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,6144,1,0,14.4898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,10240,1,0,26.1377
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,8192,1,0,20.1764
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,16,1,0,0.2085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,0,0.1847
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,12288,1,0,31.9840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,32,1,0,0.2422
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,64,1,0,0.3238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,128,1,0,0.5533
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,256,1,0,0.9740
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,16384,1,0,45.3748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,512,1,0,1.8660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1536,1,0,5.7463
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1024,1,0,3.7100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,3072,1,0,13.1787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,4096,1,0,18.4870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,2048,1,0,8.0398
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,32768,1,0,107.8257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,6144,1,0,29.6166
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,0,0.1870
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,8192,1,0,40.5411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,16,1,0,0.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,32,1,0,0.3243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,64,1,0,0.5532
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,128,1,0,0.9691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,10240,1,0,52.6617
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,256,1,0,1.8175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,12288,1,0,64.9552
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,512,1,0,3.5154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1024,1,0,7.2374
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1536,1,0,11.5907
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,16384,1,0,92.0835
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,2048,1,0,16.2990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,16,1,0,0.3272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,3072,1,0,26.3641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,0,0.1927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,64,1,0,0.9688
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,32,1,0,0.5520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,4096,1,0,37.4505
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,128,1,0,1.8044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,6144,1,0,60.3051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,512,1,0,6.8474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,256,1,0,3.4035
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1024,1,0,14.7190
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,0,0.2053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,8192,1,0,82.9498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,16,1,0,0.5580
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,2048,1,0,33.1012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1536,1,0,23.4184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,32,1,0,0.9706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,64,1,0,1.8102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,128,1,0,3.3947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,3072,1,0,54.6933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,256,1,0,6.6232
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,512,1,0,13.8742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,0,0.2557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1024,1,0,30.1721
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,32,1,0,1.8049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,4096,1,0,76.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,16,1,0,0.9789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,64,1,0,3.3896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1536,1,0,48.3513
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,128,1,0,6.6065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,0,0.2923
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,256,1,0,13.5425
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,512,1,0,28.0348
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,16,1,0,1.8365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,2048,1,0,67.3280
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,64,1,0,6.6051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,32,1,0,3.4012
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,128,1,0,13.4557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1024,1,0,61.4418
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,32,1,0,0.1465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,16,1,0,0.1622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,0,0.1326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,256,1,0,27.6757
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,128,1,0,0.1652
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,64,1,0,0.1547
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,512,1,0,0.2877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1024,1,0,0.5654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1536,1,0,0.9049
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,512,1,0,56.6300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,3072,1,0,2.3156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,4096,1,0,3.3257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,2048,1,0,1.3625
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,6144,1,0,5.2955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,8192,1,0,7.3040
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,16384,1,0,15.9811
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,0,0.1402
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,10240,1,0,9.3750
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,16,1,0,0.3912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,256,1,0,0.2100
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,64,1,0,0.1650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,12288,1,0,11.5177
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,32,1,0,0.1559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1024,1,0,1.0323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,32768,1,0,36.5013
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,256,1,0,0.2684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,512,1,0,0.4978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1536,1,0,1.7321
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,3072,1,0,4.5317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,2048,1,0,2.6293
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,4096,1,0,6.4531
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,8192,1,0,14.4858
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,6144,1,0,10.4268
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,10240,1,0,18.7905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,0,0.1420
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,128,1,0,0.1897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,16,1,0,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,12288,1,0,23.0654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,32,1,0,0.1646
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,64,1,0,0.1896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,256,1,0,0.4438
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1024,1,0,1.9572
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,512,1,0,0.8902
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,32768,1,0,73.0435
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,16384,1,0,32.2087
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1536,1,0,3.3807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,2048,1,0,5.0948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,3072,1,0,8.8738
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,128,1,0,0.2487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,4096,1,0,12.7633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,6144,1,0,20.8487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,0,0.1431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,10240,1,0,37.6691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,16,1,0,0.1672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,64,1,0,0.2486
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,32,1,0,0.1916
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,128,1,0,0.4039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,8192,1,0,29.1243
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,256,1,0,0.7873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,16384,1,0,64.6953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,12288,1,0,46.2937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,512,1,0,1.6783
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1024,1,0,3.7310
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1536,1,0,6.5691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,2048,1,0,10.0636
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,3072,1,0,17.8075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,6144,1,0,41.8926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,32768,1,0,145.9638
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,0,0.1479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,4096,1,0,25.8237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,8192,1,0,58.4971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,16,1,0,0.1909
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,32,1,0,0.2473
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,64,1,0,0.4056
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,10240,1,0,75.2110
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,128,1,0,0.7180
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,512,1,0,3.2079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,256,1,0,1.4684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,12288,1,0,92.2828
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1024,1,0,7.3141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1536,1,0,13.1461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,2048,1,0,20.2317
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,0,0.1507
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,16384,1,0,128.8051
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,3072,1,0,35.8497
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,16,1,0,0.2483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,32,1,0,0.4044
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,64,1,0,0.7120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,4096,1,0,51.8264
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,128,1,0,1.3211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,256,1,0,2.7487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,512,1,0,6.1470
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1024,1,0,14.8123
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1536,1,0,26.3728
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,0,0.1590
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,6144,1,0,83.6125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,16,1,0,0.4093
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,8192,1,0,116.8670
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,2048,1,0,40.6911
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,32,1,0,0.7107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,128,1,0,2.4457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,256,1,0,5.3236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,3072,1,0,71.7684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,64,1,0,1.3197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,512,1,0,12.5361
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,16,1,0,0.7202
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,32,1,0,1.3148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,0,0.1777
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,4096,1,0,103.4629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1024,1,0,29.8512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,64,1,0,2.4462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1536,1,0,52.9468
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,128,1,0,4.7871
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,256,1,0,10.7945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,16,1,0,1.3283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,32,1,0,2.4261
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,512,1,0,25.4492
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,0,0.2164
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,2048,1,0,80.9840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,128,1,0,9.6994
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,64,1,0,4.7807
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,0,0.1050
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,256,1,0,21.8192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,32,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1024,1,0,59.4158
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,16,1,0,0.1176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,64,1,0,0.1279
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,256,1,0,0.1644
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,512,1,0,0.2146
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1024,1,0,0.3718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1536,1,0,0.5676
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,512,1,0,50.0174
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,128,1,0,0.1368
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,4096,1,0,1.9838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,6144,1,0,3.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,8192,1,0,4.3603
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,12288,1,0,6.9525
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,3072,1,0,1.3944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,16384,1,0,9.8606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,0,0.1163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,2048,1,0,0.8440
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,32,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,64,1,0,0.1383
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,128,1,0,0.1576
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,32768,1,0,23.8312
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,512,1,0,0.3350
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,10240,1,0,5.6298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,16,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,256,1,0,0.2034
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1024,1,0,0.6705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1536,1,0,1.0775
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,2048,1,0,1.5820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,6144,1,0,6.1349
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,3072,1,0,2.7048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,4096,1,0,3.8222
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,8192,1,0,8.6165
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,0,0.1151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,12288,1,0,13.9812
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,10240,1,0,11.3101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,64,1,0,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,16,1,0,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,32768,1,0,47.8752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,16384,1,0,19.8834
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,32,1,0,0.1365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,128,1,0,0.1953
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,256,1,0,0.3115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,512,1,0,0.5919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,2048,1,0,3.0211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1024,1,0,1.2483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,4096,1,0,7.5340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,6144,1,0,12.3404
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1536,1,0,2.0677
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,10240,1,0,22.7376
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,3072,1,0,5.2301
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,12288,1,0,28.0159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,16,1,0,0.1360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,8192,1,0,17.4432
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,64,1,0,0.1944
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,16384,1,0,40.0465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,32,1,0,0.1589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,128,1,0,0.2938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,256,1,0,0.5501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1024,1,0,2.3217
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1536,1,0,3.9629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,2048,1,0,5.9371
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,4096,1,0,15.2043
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,32768,1,0,95.8102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,512,1,0,1.1117
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,6144,1,0,24.7824
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,8192,1,0,35.0687
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,3072,1,0,10.5102
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,0,0.1189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,16,1,0,0.1583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,32,1,0,0.1937
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,10240,1,0,45.3950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,64,1,0,0.2927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,128,1,0,0.5094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,12288,1,0,56.0396
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,512,1,0,2.0558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,256,1,0,1.0005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1536,1,0,7.9793
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,2048,1,0,11.9958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,16384,1,0,79.6305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,3072,1,0,21.1092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1024,1,0,4.5480
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,0,0.1238
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,16,1,0,0.1959
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,4096,1,0,30.7029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,32,1,0,0.2938
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,64,1,0,0.5095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,128,1,0,0.9218
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,512,1,0,3.9343
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,8192,1,0,70.1183
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,256,1,0,1.8224
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1536,1,0,16.0708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,6144,1,0,49.8612
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1024,1,0,9.3211
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,0,0.1292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,2048,1,0,24.2446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,16,1,0,0.2952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,128,1,0,1.6789
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,32,1,0,0.5107
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,64,1,0,0.9259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,3072,1,0,42.3150
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,512,1,0,8.0472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,4096,1,0,60.9239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,256,1,0,3.5394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1024,1,0,18.7559
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,16,1,0,0.5112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,32,1,0,0.9192
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,0,0.1421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,64,1,0,1.6748
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1536,1,0,32.4363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,256,1,0,7.1978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,2048,1,0,48.2787
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,0,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,512,1,0,16.4329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,128,1,0,3.2656
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,32,1,0,1.6791
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,128,1,0,6.6479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,16,1,0,0.9246
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1024,1,0,37.1604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,64,1,0,3.2341
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,16,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,256,1,0,14.6844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,32,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,128,1,0,0.1213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,256,1,0,0.1434
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,64,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,512,1,0,32.6048
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1024,1,0,0.2852
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,2048,1,0,0.5844
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1536,1,0,0.4120
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,4096,1,0,1.3294
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,6144,1,0,2.0945
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,8192,1,0,2.8874
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,12288,1,0,4.6935
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,512,1,0,0.1804
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,16384,1,0,6.7857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,10240,1,0,3.7622
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,32768,1,0,17.6228
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,64,1,0,0.1216
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,16,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,128,1,0,0.1381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,3072,1,0,0.9340
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,512,1,0,0.2610
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1024,1,0,0.4948
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,2048,1,0,1.0660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1536,1,0,0.7484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,32,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,256,1,0,0.1778
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,4096,1,0,2.4855
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,10240,1,0,7.5484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,6144,1,0,4.0080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,3072,1,0,1.7845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,12288,1,0,9.4797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,16,1,0,0.1141
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,32,1,0,0.1214
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,16384,1,0,13.7356
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,64,1,0,0.1391
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,32768,1,0,35.2877
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,256,1,0,0.2484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,512,1,0,0.4522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,128,1,0,0.1697
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1536,1,0,1.4112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,2048,1,0,1.9965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,3072,1,0,3.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,6144,1,0,8.0831
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,8192,1,0,11.5686
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,10240,1,0,15.2765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1024,1,0,0.8861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,12288,1,0,19.0411
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,0,0.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,16384,1,0,27.6510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,4096,1,0,4.8843
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,16,1,0,0.1215
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,32,1,0,0.1382
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,128,1,0,0.2394
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,64,1,0,0.1684
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,512,1,0,0.8175
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1024,1,0,1.6236
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1536,1,0,2.6543
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,32768,1,0,70.3767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,256,1,0,0.4309
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,3072,1,0,6.8705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,4096,1,0,9.9712
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,6144,1,0,16.3213
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,10240,1,0,30.4084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,0,0.1125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,8192,1,0,23.3867
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,16,1,0,0.1395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,12288,1,0,37.7483
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,32,1,0,0.1693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,128,1,0,0.4080
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,256,1,0,0.7584
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,16384,1,0,55.3990
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,64,1,0,0.2385
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,512,1,0,1.4679
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1024,1,0,3.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1536,1,0,5.3654
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,2048,1,0,7.9283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,3072,1,0,13.8820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,4096,1,0,20.1861
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,0,0.1134
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,16,1,0,0.1705
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,32,1,0,0.2395
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,8192,1,0,46.4275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,256,1,0,1.3682
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,6144,1,0,32.6681
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,128,1,0,0.7181
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,512,1,0,2.8429
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1024,1,0,6.5360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1536,1,0,10.8888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,2048,1,0,16.0635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,0,0.1159
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,16,1,0,0.2416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,3072,1,0,27.6467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,32,1,0,0.4085
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,64,1,0,0.7206
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,128,1,0,1.2971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,4096,1,0,39.9148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,256,1,0,2.6300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,512,1,0,5.8536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1024,1,0,13.1727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,16,1,0,0.4126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,0,0.1239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1536,1,0,21.9331
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,32,1,0,0.7197
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,64,1,0,1.2913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,2048,1,0,31.8653
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,128,1,0,2.4848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,256,1,0,5.3982
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,512,1,0,11.9649
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,0,0.1475
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,32,1,0,1.3031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,16,1,0,0.7235
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1024,1,0,26.0816
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,64,1,0,2.4633
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,32,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,64,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,0,0.0951
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,512,1,0,23.7397
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,256,1,0,10.9924
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,16,1,0,0.1059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,128,1,0,0.1155
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,256,1,0,0.1329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,512,1,0,0.1710
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,2048,1,0,0.5460
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,3072,1,0,0.8760
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1024,1,0,0.2641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,4096,1,0,1.2509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1536,1,0,0.3838
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,6144,1,0,1.9863
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,8192,1,0,2.7413
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,64,1,0,0.4114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,12288,1,0,4.4692
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,16384,1,0,6.4952
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,10240,1,0,3.5832
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,16,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,32,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,32768,1,0,17.0326
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,64,1,0,0.1153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,128,1,0,0.1296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,256,1,0,0.1641
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,512,1,0,0.2443
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1024,1,0,0.4503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1536,1,0,0.6898
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,4096,1,0,2.3303
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,3072,1,0,1.6704
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,2048,1,0,0.9934
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,10240,1,0,7.2057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,12288,1,0,9.0076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,16384,1,0,13.1457
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,8192,1,0,5.3865
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,6144,1,0,3.7895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,16,1,0,0.1099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,0,0.1010
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,32768,1,0,34.2927
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,64,1,0,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,256,1,0,0.2325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,512,1,0,0.4111
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1024,1,0,0.8121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,2048,1,0,1.8329
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,32,1,0,0.1156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1536,1,0,1.2866
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,4096,1,0,4.5897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,3072,1,0,3.1645
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,128,1,0,5.1143
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,8192,1,0,11.0499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,10240,1,0,14.5484
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,6144,1,0,7.6921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,16384,1,0,26.6084
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,16,1,0,0.1152
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,12288,1,0,18.0795
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,32,1,0,0.1300
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,64,1,0,0.1586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,128,1,0,0.2203
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,512,1,0,0.7296
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,256,1,0,0.3895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,32768,1,0,68.0827
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1536,1,0,2.4251
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1024,1,0,1.4648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,2048,1,0,3.5727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,3072,1,0,6.3947
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,4096,1,0,9.3820
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,8192,1,0,22.1955
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,10240,1,0,29.0053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,0,0.1057
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,6144,1,0,15.4767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,16,1,0,0.1299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,12288,1,0,36.0282
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,32,1,0,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,128,1,0,0.3706
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,256,1,0,0.6837
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,64,1,0,0.2219
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,16384,1,0,53.0565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,512,1,0,1.3039
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1536,1,0,4.9053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,2048,1,0,7.2890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1024,1,0,2.8221
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,3072,1,0,12.9539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,4096,1,0,18.8873
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,0,0.1071
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,32,1,0,0.2210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,16,1,0,0.1582
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,64,1,0,0.3711
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,128,1,0,0.6454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,8192,1,0,44.4098
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,256,1,0,1.2062
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,6144,1,0,31.0894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,512,1,0,2.4890
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1024,1,0,5.8430
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1536,1,0,10.0055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,16,1,0,0.2229
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,2048,1,0,14.7960
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,3072,1,0,26.0441
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,128,1,0,1.1408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,0,0.1097
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,512,1,0,5.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,64,1,0,0.6472
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,4096,1,0,37.4800
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,256,1,0,2.3054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1024,1,0,12.0304
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,16,1,0,0.3701
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1536,1,0,20.1949
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,0,0.1173
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,32,1,0,0.6447
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,2048,1,0,29.5408
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,64,1,0,1.1354
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,128,1,0,2.1943
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,256,1,0,4.7915
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,16,1,0,0.6451
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,512,1,0,10.6094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1024,1,0,23.5313
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,32,1,0,1.1444
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,64,1,0,2.1958
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,128,1,0,4.5204
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,0,0.1347
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,256,1,0,9.7734
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,32,1,0,0.1037
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,64,1,0,0.1077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,0,0.0940
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,16,1,0,0.1036
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,512,1,0,20.9539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,128,1,0,0.1135
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,256,1,0,0.1298
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1024,1,0,0.2583
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1536,1,0,0.3720
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,3072,1,0,0.8565
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,2048,1,0,0.5335
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,4096,1,0,1.2119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,8192,1,0,2.6823
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,6144,1,0,1.9363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,12288,1,0,4.3848
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,16384,1,0,6.3669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,10240,1,0,3.5069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,16,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,32768,1,0,16.7026
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,512,1,0,0.1663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,32,1,0,0.1091
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,64,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,256,1,0,0.1606
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,512,1,0,0.2360
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1024,1,0,0.4423
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,128,1,0,0.1273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1536,1,0,0.6643
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,4096,1,0,2.2669
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,3072,1,0,1.6198
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,32,1,0,0.3691
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,8192,1,0,5.2894
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,6144,1,0,3.6912
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,12288,1,0,8.8467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,16384,1,0,12.8919
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,2048,1,0,0.9542
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,10240,1,0,7.0239
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,32,1,0,0.1148
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,64,1,0,0.1275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,256,1,0,0.2223
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,128,1,0,0.1560
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,512,1,0,0.3950
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1024,1,0,0.7689
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,32768,1,0,33.7275
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1536,1,0,1.2342
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,2048,1,0,1.7635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,16,1,0,0.1075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,4096,1,0,4.4357
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,3072,1,0,3.0725
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,6144,1,0,7.4566
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,12288,1,0,17.6648
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,16,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,10240,1,0,14.2237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,32,1,0,0.1276
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,8192,1,0,10.7124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,128,1,0,0.2139
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,256,1,0,0.3755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,32768,1,0,67.4079
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,64,1,0,0.1558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,512,1,0,0.6933
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,2048,1,0,3.4069
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,16384,1,0,26.0971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1536,1,0,2.3092
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1024,1,0,1.3989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,6144,1,0,15.1271
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,4096,1,0,9.0672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,8192,1,0,21.6921
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,3072,1,0,6.2337
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,12288,1,0,35.3733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,32,1,0,0.1555
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,64,1,0,0.2138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,128,1,0,0.3551
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,10240,1,0,28.3629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,256,1,0,0.6493
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,16384,1,0,51.7558
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1024,1,0,2.6906
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,512,1,0,1.2336
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,2048,1,0,7.0187
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1536,1,0,4.7253
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,4096,1,0,18.3626
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,3072,1,0,12.4830
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,16,1,0,0.1545
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,32,1,0,0.2126
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,64,1,0,0.3557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,6144,1,0,30.1363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,8192,1,0,43.0549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,128,1,0,0.6108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,256,1,0,1.1462
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,512,1,0,2.3672
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1024,1,0,5.6272
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1536,1,0,9.5290
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,0,0.1078
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,16,1,0,0.2125
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,64,1,0,0.6119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,4096,1,0,36.4587
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,2048,1,0,14.2549
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,32,1,0,0.3574
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,3072,1,0,25.0445
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,128,1,0,1.0797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,256,1,0,2.1885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,512,1,0,4.8941
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1536,1,0,19.2600
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,16,1,0,0.3567
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1024,1,0,11.3730
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,32,1,0,0.6154
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,64,1,0,1.0717
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,128,1,0,2.0381
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,2048,1,0,28.4885
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,512,1,0,10.0116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,0,0.1292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,256,1,0,4.5284
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,16,1,0,0.6138
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1024,1,0,22.4840
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,64,1,0,2.0604
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,32,1,0,1.0727
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,128,1,0,4.2758
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,0,0.0908
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,16,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,512,1,0,19.7269
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,32,1,0,0.1053
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,128,1,0,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,256,1,0,9.3666
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,512,1,0,0.1613
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,256,1,0,0.1286
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,64,1,0,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1536,1,0,0.3693
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,2048,1,0,0.5247
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,3072,1,0,0.8466
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,6144,1,0,1.9184
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,4096,1,0,1.2022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,12288,1,0,4.3254
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,10240,1,0,3.4647
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,16,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,32768,1,0,16.6467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,16384,1,0,6.2978
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1024,1,0,0.2550
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,64,1,0,0.1116
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,256,1,0,0.1554
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,128,1,0,0.1245
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,512,1,0,0.2323
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,32,1,0,0.1076
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,2048,1,0,0.9365
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,3072,1,0,1.6005
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,4096,1,0,2.2421
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,6144,1,0,3.6454
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1024,1,0,0.4305
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,10240,1,0,6.9509
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1536,1,0,0.6494
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,12288,1,0,8.7498
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,16384,1,0,12.8287
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,8192,1,0,5.1901
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,0,0.0991
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,64,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,16,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,32768,1,0,33.4756
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,32,1,0,0.1124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,128,1,0,0.1518
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,256,1,0,0.2210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1024,1,0,0.7601
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,3072,1,0,3.0114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1536,1,0,1.2161
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,2048,1,0,1.7267
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,6144,1,0,7.3589
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,4096,1,0,4.3930
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,8192,1,0,10.6520
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,10240,1,0,14.0674
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,0,0.1011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,12288,1,0,17.5119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,16384,1,0,25.7650
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,16,1,0,0.1133
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,32,1,0,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,64,1,0,0.1522
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,256,1,0,0.3664
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,512,1,0,0.6780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1024,1,0,1.3635
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,32768,1,0,66.5283
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,128,1,0,0.2099
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,2048,1,0,3.3557
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,3072,1,0,6.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,8192,1,0,2.6629
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,4096,1,0,8.9315
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,10240,1,0,28.0474
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,0,0.1014
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,8192,1,0,21.4325
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,16,1,0,0.1259
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,12288,1,0,35.0153
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,64,1,0,0.2118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,16384,1,0,51.5624
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,32,1,0,0.1514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,128,1,0,0.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,256,1,0,0.6363
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,512,1,0,1.1976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1024,1,0,2.6237
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1536,1,0,4.6538
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,2048,1,0,6.8896
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,3072,1,0,12.3655
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,4096,1,0,18.0914
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,6144,1,0,29.7708
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,16,1,0,0.1503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,32,1,0,0.2121
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,8192,1,0,42.8780
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,64,1,0,0.3491
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,256,1,0,1.1140
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1024,1,0,5.4400
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,512,1,0,2.3027
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,2048,1,0,13.9922
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,0,0.1072
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,3072,1,0,24.6752
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,16,1,0,0.2131
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,4096,1,0,35.9455
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,32,1,0,0.3512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,64,1,0,0.5965
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,128,1,0,1.0446
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,256,1,0,2.1094
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,512,1,0,4.8210
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1024,1,0,11.1022
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1536,1,0,18.8798
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,0,0.1136
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,2048,1,0,27.7539
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,32,1,0,0.5972
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,64,1,0,1.0419
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,128,1,0,1.9971
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,256,1,0,4.4006
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,0,0.1266
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,16,1,0,0.6011
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,32,1,0,1.0465
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1024,1,0,21.9586
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,64,1,0,1.9767
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,128,1,0,4.1876
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,256,1,0,9.0163
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,0,0.0888
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,512,1,0,19.2461
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,16,1,0,0.1033
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,32,1,0,0.1029
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,64,1,0,0.1052
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,128,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,256,1,0,0.1278
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,128,1,0,0.5976
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,512,1,0,0.1634
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1024,1,0,0.2514
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1536,1,0,0.3663
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,2048,1,0,0.5256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,3072,1,0,0.8401
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,6144,1,0,1.9066
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,8192,1,0,2.6416
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,4096,1,0,1.1886
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,12288,1,0,4.3103
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,16384,1,0,6.2540
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,0,0.0995
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,32768,1,0,16.5592
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,32,1,0,0.1059
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,64,1,0,0.1112
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,128,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,256,1,0,0.1563
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,512,1,0,0.2299
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1024,1,0,0.4292
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,2048,1,0,0.9311
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,3072,1,0,1.5845
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,6144,1,0,3.6263
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,8192,1,0,5.1703
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,10240,1,0,6.9176
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,512,1,0,9.7742
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,12288,1,0,8.6857
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,16384,1,0,12.7124
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,0,0.0970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,32768,1,0,33.2132
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,16,1,0,0.1054
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,32,1,0,0.1114
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,64,1,0,0.1256
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,128,1,0,0.1506
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,256,1,0,0.2189
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,512,1,0,0.3875
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1024,1,0,0.7511
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1536,1,0,1.2073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,2048,1,0,1.7151
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,3072,1,0,2.9895
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,4096,1,0,4.3536
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,6144,1,0,7.3273
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,16,1,0,0.1016
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,10240,1,0,13.9662
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,12288,1,0,17.4519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,16384,1,0,25.6464
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,16,1,0,0.1115
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,0,0.0989
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,32,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,256,1,0,0.3680
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,128,1,0,0.2095
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,32768,1,0,66.3156
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1536,1,0,2.2487
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,2048,1,0,3.3488
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,3072,1,0,6.0579
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,4096,1,0,8.8659
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,6144,1,0,14.7519
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,8192,1,0,21.4227
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,10240,1,0,27.8755
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,12288,1,0,34.6954
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,0,0.1032
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,16384,1,0,51.0119
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,16,1,0,0.1257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,32,1,0,0.1510
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,64,1,0,0.2077
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,128,1,0,0.3469
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,256,1,0,0.6257
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1024,1,0,2.6205
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1536,1,0,4.5503
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,2048,1,0,6.8248
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,3072,1,0,12.3065
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,8192,1,0,10.5926
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,4096,1,0,17.9718
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,6144,1,0,29.5776
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,0,0.1031
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,16,1,0,0.1501
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,8192,1,0,42.6064
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,32,1,0,0.2075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,64,1,0,0.3479
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,64,1,0,0.1512
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,128,1,0,0.5913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,256,1,0,1.1127
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,512,1,0,2.2765
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1024,1,0,5.3851
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,512,1,0,0.6733
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,2048,1,0,13.8562
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1536,1,0,9.2319
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,0,0.1055
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,3072,1,0,24.6967
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,16,1,0,0.2088
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,64,1,0,0.5905
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1024,1,0,1.3485
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,4096,1,0,35.9073
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,32,1,0,0.3499
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,128,1,0,1.0390
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,512,1,0,4.7118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,256,1,0,2.1108
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1024,1,0,10.9660
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,0,0.1118
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,32,1,0,0.5913
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,16,1,0,0.3490
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,128,1,0,1.9495
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,64,1,0,1.0467
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,2048,1,0,27.7075
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,512,1,0,9.6101
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1536,1,0,18.6797
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,0,0.1258
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,16,1,0,0.5970
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1024,1,0,21.6962
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,256,1,0,4.3897
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,64,1,0,1.9431
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,256,1,0,8.8846
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,128,1,0,4.0829
TRTLLM,1.2.0rc5,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,512,1,0,18.9554
