framework,version,device,op_name,kernel_source,moe_dtype,num_tokens,hidden_size,inter_size,topk,num_experts,moe_tp_size,moe_ep_size,distribution,latency
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,2,power_law_1.01,0.06284160017967225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,2,power_law_1.01,0.06423680186271667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,2,power_law_1.01,0.06666880249977111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,2,power_law_1.01,0.0762112021446228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,2,power_law_1.01,0.08512639999389648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,2,power_law_1.01,0.08989440202713013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,2,power_law_1.01,0.1019327998161316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,2,power_law_1.01,0.10757759809494019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,2,power_law_1.01,0.10740480422973633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,2,power_law_1.01,0.1118016004562378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,2,power_law_1.01,0.11424640417099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,2,power_law_1.01,0.11744639873504639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,2,power_law_1.01,0.12062720060348511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,2,power_law_1.01,0.12300159931182861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,2,power_law_1.01,0.1310528039932251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,2,power_law_1.01,0.13333760499954223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,2,power_law_1.01,0.14201600551605226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,2,power_law_1.01,0.1568511962890625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,2,power_law_1.01,0.17623679637908934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,2,power_law_1.01,0.2106559991836548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,2,power_law_1.01,0.2477184057235718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,2,power_law_1.01,0.32273919582366944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,2,power_law_1.01,0.3955647945404053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,2,power_law_1.01,0.5369344234466553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,2,power_law_1.01,0.6948927879333496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,2,power_law_1.01,1.046713638305664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,2,power_law_1.01,1.3251071929931642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,8,balanced,0.05686399837334951
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,8,balanced,0.05793599784374237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,8,balanced,0.05807466804981232
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,8,balanced,0.064560001095136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,8,balanced,0.08089600006739299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,8,balanced,0.09656533598899841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,8,balanced,0.09769599636395772
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,8,balanced,0.10074133674303691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,8,balanced,0.09700266520182292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,8,balanced,0.10060800115267436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,8,balanced,0.09987733761469524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,8,balanced,0.10043199857076009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,8,balanced,0.10339732964833577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,8,balanced,0.10341333349545796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,8,balanced,0.10941867033640544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,8,balanced,0.10956799983978271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,8,balanced,0.11283733447392781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,8,balanced,0.11924266815185547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,8,balanced,0.1253919998804728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,8,balanced,0.14010133345921835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,8,balanced,0.15227733055750528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,8,balanced,0.18126400311787924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,8,balanced,0.20534400145212808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,8,balanced,0.27740800380706787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,8,balanced,0.3378239870071411
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,8,balanced,0.48768532276153564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,8,balanced,0.6301866769790649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,16,power_law_1.2,0.06117759943008423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,16,power_law_1.2,0.06791679859161377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,16,power_law_1.2,0.052262401580810545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,16,power_law_1.2,0.054636800289154054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,16,power_law_1.2,0.05208960175514221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,16,power_law_1.2,0.051577597856521606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,16,power_law_1.2,0.052671998739242554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,16,power_law_1.2,0.05348479747772217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,16,power_law_1.2,0.05372160077095032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,16,power_law_1.2,0.05374720096588135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,16,power_law_1.2,0.054502397775650024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,16,power_law_1.2,0.05607680082321167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,16,power_law_1.2,0.05631359815597534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,16,power_law_1.2,0.05852159857749939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,16,power_law_1.2,0.06529279947280883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,16,power_law_1.2,0.0657151997089386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,16,power_law_1.2,0.0695743978023529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,16,power_law_1.2,0.07959039807319641
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,16,power_law_1.2,0.08769919872283935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,16,power_law_1.2,0.10298880338668823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,16,power_law_1.2,0.11413120031356812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,16,power_law_1.2,0.14925440549850463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,16,power_law_1.2,0.18469120264053346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,16,power_law_1.2,0.25265278816223147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,16,power_law_1.2,0.30663039684295657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,16,power_law_1.2,0.4666111946105957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,16,power_law_1.2,0.5829247951507568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,4,balanced,0.05388799806435903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,4,balanced,0.05362133185068766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,4,balanced,0.05412266651789347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,4,balanced,0.057989334066708885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,4,balanced,0.06629333396752675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,4,balanced,0.08850666880607605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,4,balanced,0.08966400225957234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,4,balanced,0.0928106705347697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,4,balanced,0.09134399890899658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,4,balanced,0.09135466814041138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,4,balanced,0.09443733096122742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,4,balanced,0.09613333145777385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,4,balanced,0.09849066535631816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,4,balanced,0.0992693305015564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,4,balanced,0.10532266894976298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,4,balanced,0.10587732990582784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,4,balanced,0.10945066809654236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,4,balanced,0.12024533748626709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,4,balanced,0.12710932890574136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,4,balanced,0.14833600322405496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,4,balanced,0.16690667470296225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,4,balanced,0.20472532510757446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,4,balanced,0.23920534054438272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,4,balanced,0.3380746841430664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,4,balanced,0.4195679823557536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,4,balanced,0.603551983833313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,4,balanced,0.7812746365865072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,4,power_law_1.01,0.07957760095596314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,4,power_law_1.01,0.11290240287780762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,4,power_law_1.01,0.1288256049156189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,4,power_law_1.01,0.15589120388031005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,4,power_law_1.01,0.1834239959716797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,4,power_law_1.01,0.20016639232635497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,4,power_law_1.01,0.2625024080276489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,4,power_law_1.01,0.25182080268859863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,4,power_law_1.01,0.26566400527954104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,4,power_law_1.01,0.2552639961242676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,4,power_law_1.01,0.2681472063064575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,4,power_law_1.01,0.2652031898498535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,4,power_law_1.01,0.2791680097579956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,4,power_law_1.01,0.27841920852661134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,4,power_law_1.01,0.2791680097579956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,4,power_law_1.01,0.2836607933044434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,4,power_law_1.01,0.2951551914215088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,4,power_law_1.01,0.33050880432128904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,4,power_law_1.01,0.32849280834197997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,4,power_law_1.01,0.3899967908859253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,4,power_law_1.01,0.3938175916671753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,4,power_law_1.01,0.49762558937072754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,4,power_law_1.01,0.5054912090301513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,4,power_law_1.01,0.6258175849914551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,4,power_law_1.01,0.7368639945983887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,4,power_law_1.01,1.0170432090759278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,4,power_law_1.01,1.153273582458496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,2,power_law_1.01,0.045465600490570066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,2,power_law_1.01,0.048179200291633605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,2,power_law_1.01,0.04951040148735046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,2,power_law_1.01,0.05511680245399475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,2,power_law_1.01,0.05923200249671936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,2,power_law_1.01,0.06371200084686279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,2,power_law_1.01,0.07176960110664368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,2,power_law_1.01,0.07267839908599853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,2,power_law_1.01,0.0737600028514862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,2,power_law_1.01,0.07751039862632751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,2,power_law_1.01,0.07747840285301208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,2,power_law_1.01,0.0814848005771637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,2,power_law_1.01,0.08468480110168457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,2,power_law_1.01,0.08761600255966187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,2,power_law_1.01,0.09355520009994507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,2,power_law_1.01,0.09590399861335755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,2,power_law_1.01,0.10347520112991333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,2,power_law_1.01,0.12213759422302246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,2,power_law_1.01,0.1313088059425354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,2,power_law_1.01,0.15774719715118407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,2,power_law_1.01,0.18229119777679442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,2,power_law_1.01,0.23430399894714354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,2,power_law_1.01,0.3075455904006958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,2,power_law_1.01,0.40643839836120604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,2,power_law_1.01,0.501964807510376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,2,power_law_1.01,0.7043647766113281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,2,power_law_1.01,0.8943552017211914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,1,power_law_1.2,0.051667201519012454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,1,power_law_1.2,0.06595199704170226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,1,power_law_1.2,0.0814848005771637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,1,power_law_1.2,0.10992640256881714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,1,power_law_1.2,0.13930879831314086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,1,power_law_1.2,0.1715648055076599
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,1,power_law_1.2,0.23280000686645508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,1,power_law_1.2,0.24972159862518312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,1,power_law_1.2,0.25320959091186523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,1,power_law_1.2,0.26005120277404786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,1,power_law_1.2,0.27089920043945315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,1,power_law_1.2,0.2851072072982788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,1,power_law_1.2,0.28480000495910646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,1,power_law_1.2,0.29453439712524415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,1,power_law_1.2,0.31473278999328613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,1,power_law_1.2,0.3216576099395752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,1,power_law_1.2,0.3458240032196045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,1,power_law_1.2,0.39768319129943847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,1,power_law_1.2,0.41464958190917967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,1,power_law_1.2,0.493228816986084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,1,power_law_1.2,0.5377855777740479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,1,power_law_1.2,0.6793087959289551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,1,power_law_1.2,0.8230079650878906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,1,power_law_1.2,1.0934080123901366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,1,power_law_1.2,1.353113555908203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,1,power_law_1.2,1.8768512725830078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,1,power_law_1.2,2.3739648818969727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,4,power_law_1.01,0.060192000865936277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,4,power_law_1.01,0.07438719868659974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,4,power_law_1.01,0.08174719810485839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,4,power_law_1.01,0.08826879858970642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,4,power_law_1.01,0.1023360013961792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,4,power_law_1.01,0.11141120195388794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,4,power_law_1.01,0.13523839712142943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,4,power_law_1.01,0.13010560274124144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,4,power_law_1.01,0.12713600397109986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,4,power_law_1.01,0.12823679447174072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,4,power_law_1.01,0.13409279584884642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,4,power_law_1.01,0.13663359880447387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,4,power_law_1.01,0.13471360206604005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,4,power_law_1.01,0.1400320053100586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,4,power_law_1.01,0.1473536014556885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,4,power_law_1.01,0.15218559503555298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,4,power_law_1.01,0.15953919887542725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,4,power_law_1.01,0.17693439722061158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,4,power_law_1.01,0.19411840438842773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,4,power_law_1.01,0.23038079738616943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,4,power_law_1.01,0.2525183916091919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,4,power_law_1.01,0.29960319995880125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,4,power_law_1.01,0.35501439571380616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,4,power_law_1.01,0.4592448234558105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,4,power_law_1.01,0.5919424057006836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,4,power_law_1.01,0.825932788848877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,4,power_law_1.01,1.0810560226440429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,8,balanced,0.05819199979305267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,8,balanced,0.06052800019582113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,8,balanced,0.06378666559855144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,8,balanced,0.07572799921035767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,8,balanced,0.09912000099817912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,8,balanced,0.13272000352541605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,8,balanced,0.1342026690642039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,8,balanced,0.13648533821105957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,8,balanced,0.1344160040219625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,8,balanced,0.13730667034784952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,8,balanced,0.13637866576512656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,8,balanced,0.13712533315022787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,8,balanced,0.13846932848294577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,8,balanced,0.13872533043225607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,8,balanced,0.14416533708572388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,8,balanced,0.14523733655611673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,8,balanced,0.14871467153231302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,8,balanced,0.15324800213178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,8,balanced,0.16057067116101584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,8,balanced,0.17691200971603394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,8,balanced,0.18953067064285278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,8,balanced,0.22025599082310995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,8,balanced,0.24632000923156738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,8,balanced,0.33315734068552655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,8,balanced,0.3940426508585612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,8,balanced,0.5586453278859457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,8,balanced,0.7277706464131674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,8,balanced,0.041706666350364685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,8,balanced,0.04533333579699198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,8,balanced,0.04386133452256521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,8,balanced,0.04762133459250132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,8,balanced,0.04975999891757965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,8,balanced,0.06508799890677135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,8,balanced,0.06743999818960826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,8,balanced,0.068122665087382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,8,balanced,0.0681279997030894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,8,balanced,0.06840533514817555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,8,balanced,0.07083733379840851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,8,balanced,0.07016533116499583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,8,balanced,0.07287466526031494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,8,balanced,0.07420266668001811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,8,balanced,0.07838933169841766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,8,balanced,0.08083199958006541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,8,balanced,0.08450133601824443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,8,balanced,0.09171733260154724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,8,balanced,0.10019200046857198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,8,balanced,0.1146506667137146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,8,balanced,0.1276479959487915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,8,balanced,0.16301332910855612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,8,balanced,0.19164266188939413
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,8,balanced,0.2587040066719055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,8,balanced,0.3102346658706665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,8,balanced,0.4386933247248332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,8,balanced,0.5617973407109579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,2,power_law_1.2,0.020403200387954713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,2,power_law_1.2,0.024928000569343568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,2,power_law_1.2,0.02927359938621521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,2,power_law_1.2,0.04199680089950562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,2,power_law_1.2,0.05343359708786011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,2,power_law_1.2,0.05779839754104614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,2,power_law_1.2,0.07342720031738281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,2,power_law_1.2,0.07662720084190369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,2,power_law_1.2,0.07429760098457336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,2,power_law_1.2,0.07927680015563965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,2,power_law_1.2,0.08399999737739564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,2,power_law_1.2,0.08655999898910523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,2,power_law_1.2,0.08848639726638793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,2,power_law_1.2,0.08915839791297912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,2,power_law_1.2,0.09411200284957885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,2,power_law_1.2,0.10194560289382934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,2,power_law_1.2,0.11344000101089477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,2,power_law_1.2,0.12550400495529174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,2,power_law_1.2,0.12163200378417968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,2,power_law_1.2,0.15289599895477296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,2,power_law_1.2,0.15591039657592773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,8,power_law_1.01,0.07828480005264282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,2,power_law_1.2,0.19649920463562012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,2,power_law_1.2,0.21753599643707275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,8,power_law_1.01,0.10965759754180908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,2,power_law_1.2,0.2569024085998535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,8,power_law_1.01,0.12398719787597656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,8,power_law_1.01,0.13654400110244752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,8,power_law_1.01,0.13352960348129272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,8,power_law_1.01,0.14292479753494264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,8,power_law_1.01,0.16072319746017455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,8,power_law_1.01,0.14725760221481324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,8,power_law_1.01,0.15549440383911134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,8,power_law_1.01,0.15507199764251708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,2,power_law_1.2,0.3082880020141602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,8,power_law_1.01,0.16416000127792357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,2,power_law_1.2,0.41492481231689454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,8,power_law_1.01,0.15717120170593263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,2,power_law_1.2,0.5109119892120362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,8,power_law_1.01,0.16476160287857056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,8,power_law_1.01,0.168230402469635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,8,power_law_1.01,0.17468160390853882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,8,power_law_1.01,0.1718016028404236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,8,power_law_1.01,0.17690880298614503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,8,power_law_1.01,0.20894079208374022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,8,power_law_1.01,0.19846400022506713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,8,power_law_1.01,0.23287680149078369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,8,power_law_1.01,0.24574079513549804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,8,power_law_1.01,0.3037888050079346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,8,power_law_1.01,0.3308799982070923
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,8,power_law_1.01,0.41016321182250975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,8,power_law_1.01,0.4766655921936035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,8,power_law_1.01,0.6834688186645508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,8,power_law_1.01,0.8373120307922364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,32,balanced,0.03988266736268997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,32,balanced,0.04178666571776072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,32,balanced,0.04016000032424927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,32,balanced,0.04409599800904592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,32,balanced,0.04566933214664459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,32,balanced,0.04608533283074697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,32,balanced,0.04811733464399973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,32,balanced,0.050426666935284935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,32,balanced,0.0499839981396993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,32,balanced,0.050000001986821495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,32,balanced,0.04981866478919983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,32,balanced,0.051914667089780174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,32,balanced,0.05198933184146881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,32,balanced,0.05343999962011973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,32,balanced,0.05835733314355215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,32,balanced,0.05994133154551188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,32,balanced,0.06470933556556702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,32,balanced,0.06619733572006226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,32,balanced,0.07457600037256877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,32,balanced,0.08483200271924336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,32,balanced,0.09267200032869975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,32,balanced,0.11749333143234253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,32,balanced,0.13618133465449014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,32,balanced,0.18212266763051352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,32,balanced,0.223962664604187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,32,balanced,0.31031467517217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,32,balanced,0.39233601093292236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,16,balanced,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,16,balanced,0.02179199953873952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,16,balanced,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,16,balanced,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,16,balanced,0.027722666660944622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,16,balanced,0.044624000787734985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,16,balanced,0.04764799773693085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,16,balanced,0.04598399996757507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,16,balanced,0.04840533435344696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,16,balanced,0.04971733192602793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,16,balanced,0.05003733436266581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,16,balanced,0.05341866612434387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,16,balanced,0.05527999997138977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,16,balanced,0.057914664347966514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,16,balanced,0.06406400104363759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,16,balanced,0.068122665087382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,16,balanced,0.09859733780225118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,16,balanced,0.1160533328851064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,16,balanced,0.14251733819643655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,16,balanced,0.17889066537221274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,16,balanced,0.13345066706339517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,16,balanced,0.15848533312479654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,16,balanced,0.1869759956995646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,16,balanced,0.23203200101852417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,16,balanced,0.284496009349823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,16,balanced,0.4356853167215983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,16,balanced,0.5387893517812093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,8,balanced,0.04993600149949392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,8,balanced,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,8,balanced,0.05171200136343638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,8,balanced,0.05795733133951823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,8,balanced,0.07260266443093617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,8,balanced,0.09517866373062134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,8,balanced,0.09646933277448018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,8,balanced,0.09886399904886882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,8,balanced,0.09709866841634114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,8,balanced,0.09759466846783955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,8,balanced,0.10125333070755005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,8,balanced,0.10005333026250203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,8,balanced,0.10099200407663982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,8,balanced,0.10325866937637329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,8,balanced,0.1090773344039917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,8,balanced,0.11000532905260722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,8,balanced,0.11353066563606262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,8,balanced,0.12098133563995361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,8,balanced,0.12813333670298258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,8,balanced,0.14225066701571146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,8,balanced,0.15928533673286438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,8,balanced,0.18639467159907022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,8,balanced,0.21502399444580078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,8,balanced,0.2825439969698588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,8,balanced,0.34163200855255127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,8,balanced,0.47326934337615967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,8,balanced,0.5884746710459391
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,4,power_law_1.2,0.051545602083206174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,4,power_law_1.2,0.05392640233039856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,4,power_law_1.2,0.0545087993144989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,4,power_law_1.2,0.06369280219078063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,4,power_law_1.2,0.06932479739189149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,4,power_law_1.2,0.07374719977378845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,4,power_law_1.2,0.08606079816818238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,4,power_law_1.2,0.07878400087356567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,4,power_law_1.2,0.07822080254554749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,4,power_law_1.2,0.082259202003479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,4,power_law_1.2,0.08693119883537292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,4,power_law_1.2,0.08615679740905761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,4,power_law_1.2,0.09059839844703674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,4,power_law_1.2,0.09455360174179077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,4,power_law_1.2,0.10214400291442871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,4,power_law_1.2,0.10673279762268066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,4,power_law_1.2,0.11121920347213746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,4,power_law_1.2,0.12254719734191895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,4,power_law_1.2,0.13690240383148194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,4,power_law_1.2,0.1584768056869507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,4,power_law_1.2,0.18922239542007446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,4,power_law_1.2,0.23340160846710206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,4,power_law_1.2,0.2862911939620972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,4,power_law_1.2,0.4091775894165039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,4,power_law_1.2,0.5271423816680908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,4,power_law_1.2,0.7318016052246094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,4,power_law_1.2,0.9388992309570312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,4,power_law_1.01,0.060499197244644164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,4,power_law_1.01,0.06723840236663818
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,4,power_law_1.01,0.07691519856452941
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,4,power_law_1.01,0.08696320056915283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,4,power_law_1.01,0.09545599818229675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,4,power_law_1.01,0.09354239702224731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,4,power_law_1.01,0.10678399801254272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,4,power_law_1.01,0.10679680109024048
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,4,power_law_1.01,0.10650240182876587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,4,power_law_1.01,0.10677119493484497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,4,power_law_1.01,0.11105920076370239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,4,power_law_1.01,0.11291520595550537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,4,power_law_1.01,0.11345280408859253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,4,power_law_1.01,0.11920000314712524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,4,power_law_1.01,0.121670401096344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,4,power_law_1.01,0.12266240119934083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,4,power_law_1.01,0.12312959432601929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,4,power_law_1.01,0.13678079843521118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,4,power_law_1.01,0.150764799118042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,4,power_law_1.01,0.17825920581817628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,4,power_law_1.01,0.1953984022140503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,4,power_law_1.01,0.24677119255065919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,4,power_law_1.01,0.3126591920852661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,4,power_law_1.01,0.43169279098510743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,4,power_law_1.01,0.5408959865570069
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,4,power_law_1.01,0.7612607955932618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,4,power_law_1.01,1.0293248176574707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,32,power_law_1.01,0.051820802688598636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,32,power_law_1.01,0.04910080134868622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,32,power_law_1.01,0.048742398619651794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,32,power_law_1.01,0.04598399996757507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,32,power_law_1.01,0.04684160053730011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,32,power_law_1.01,0.046035200357437134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,32,power_law_1.01,0.0469184011220932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,32,power_law_1.01,0.046963199973106384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,32,power_law_1.01,0.048076799511909483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,32,power_law_1.01,0.04793600142002106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,32,power_law_1.01,0.04890879988670349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,32,power_law_1.01,0.05086719989776611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,32,power_law_1.01,0.05178239941596985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,32,power_law_1.01,0.055366402864456175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,32,power_law_1.01,0.059494400024414064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,32,power_law_1.01,0.06231679916381836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,32,power_law_1.01,0.06262400150299072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,32,power_law_1.01,0.07381759881973267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,32,power_law_1.01,0.07706239819526672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,32,power_law_1.01,0.092985600233078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,32,power_law_1.01,0.11128959655761719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,32,power_law_1.01,0.1365823984146118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,32,power_law_1.01,0.16868480443954467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,32,power_law_1.01,0.2169856071472168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,32,power_law_1.01,0.27754878997802734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,32,power_law_1.01,0.38830718994140623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,32,power_law_1.01,0.49346561431884767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,32,power_law_1.01,0.06488320231437683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,32,power_law_1.01,0.05801600217819214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,32,power_law_1.01,0.05672320127487183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,32,power_law_1.01,0.057766401767730714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,32,power_law_1.01,0.05061759948730469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,32,power_law_1.01,0.050316798686981204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,32,power_law_1.01,0.057132798433303836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,32,power_law_1.01,0.05872640013694763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,32,power_law_1.01,0.05493119955062866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,32,power_law_1.01,0.06164479851722717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,32,power_law_1.01,0.061388802528381345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,32,power_law_1.01,0.061894398927688596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,32,power_law_1.01,0.06060799956321716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,32,power_law_1.01,0.06767359972000123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,32,power_law_1.01,0.07310720086097718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,32,power_law_1.01,0.07141759991645813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,32,power_law_1.01,0.0756608009338379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,32,power_law_1.01,0.08055679798126221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,32,power_law_1.01,0.0843455970287323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,32,power_law_1.01,0.09719039797782898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,32,power_law_1.01,0.1091264009475708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,32,power_law_1.01,0.1271232008934021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,32,power_law_1.01,0.1496384024620056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,32,power_law_1.01,0.19100159406661987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,32,power_law_1.01,0.23337600231170655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,32,power_law_1.01,0.3309056043624878
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,32,power_law_1.01,0.4017920017242432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,16,power_law_1.2,0.052876800298690796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,16,power_law_1.2,0.06836479902267456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,16,power_law_1.2,0.06516479849815368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,16,power_law_1.2,0.07014399766921997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,16,power_law_1.2,0.07155200242996215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,16,power_law_1.2,0.06459519863128663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,16,power_law_1.2,0.06817920207977295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,16,power_law_1.2,0.07109760046005249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,16,power_law_1.2,0.07252479791641235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,16,power_law_1.2,0.07351040244102477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,16,power_law_1.2,0.07438079714775085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,16,power_law_1.2,0.0748416006565094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,16,power_law_1.2,0.07549440264701843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,16,power_law_1.2,0.07724800109863281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,16,power_law_1.2,0.08242560029029847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,16,power_law_1.2,0.0834496021270752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,16,power_law_1.2,0.08805760145187377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,16,power_law_1.2,0.09511039853096008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,16,power_law_1.2,0.10617599487304688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,16,power_law_1.2,0.13392000198364257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,16,power_law_1.2,0.14945919513702394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,16,power_law_1.2,0.1958847999572754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,16,power_law_1.2,0.23984639644622802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,16,power_law_1.2,0.3115904092788696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,16,power_law_1.2,0.44124798774719237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,16,power_law_1.2,0.6877247810363769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,16,power_law_1.2,0.9470975875854493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,2,power_law_1.01,0.06279680132865906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,2,power_law_1.01,0.08330879807472229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,2,power_law_1.01,0.09576320052146911
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,2,power_law_1.01,0.12490880489349365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,2,power_law_1.01,0.15687040090560914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,2,power_law_1.01,0.19930239915847778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,2,power_law_1.01,0.24557440280914306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,2,power_law_1.01,0.25051519870758054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,2,power_law_1.01,0.24887681007385254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,2,power_law_1.01,0.2554559946060181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,2,power_law_1.01,0.25895678997039795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,2,power_law_1.01,0.26917119026184083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,2,power_law_1.01,0.27764480113983153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,2,power_law_1.01,0.2892863988876343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,2,power_law_1.01,0.2918528079986572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,2,power_law_1.01,0.3028287887573242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,2,power_law_1.01,0.32602241039276125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,2,power_law_1.01,0.3574079990386963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,2,power_law_1.01,0.3548736095428467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,2,power_law_1.01,0.42904958724975584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,2,power_law_1.01,0.4427072048187256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,2,power_law_1.01,0.56430082321167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,2,power_law_1.01,0.6401023864746094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,2,power_law_1.01,0.8276927947998047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,2,power_law_1.01,0.9889087677001953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,2,power_law_1.01,1.388588809967041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,2,power_law_1.01,1.6981760025024415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,32,power_law_1.2,0.062118399143219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,32,power_law_1.2,0.05045120120048523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,32,power_law_1.2,0.049055999517440795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,32,power_law_1.2,0.04968959987163544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,32,power_law_1.2,0.04935039877891541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,32,power_law_1.2,0.047635200619697574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,32,power_law_1.2,0.049593600630760196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,32,power_law_1.2,0.05013759732246399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,32,power_law_1.2,0.05125120282173157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,32,power_law_1.2,0.05023999810218811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,32,power_law_1.2,0.05185920000076294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,32,power_law_1.2,0.05309439897537231
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,32,power_law_1.2,0.05367040038108826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,32,power_law_1.2,0.054995197057724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,32,power_law_1.2,0.060147202014923094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,32,power_law_1.2,0.06033279895782471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,32,power_law_1.2,0.06371200084686279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,32,power_law_1.2,0.07011200189590454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,32,power_law_1.2,0.07727360129356384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,32,power_law_1.2,0.08821120262145996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,32,power_law_1.2,0.10432640314102173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,32,power_law_1.2,0.13322240114212036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,32,power_law_1.2,0.15228159427642823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,32,power_law_1.2,0.1982208013534546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,32,power_law_1.2,0.25639679431915285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,32,power_law_1.2,0.3883136034011841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,32,power_law_1.2,0.48407678604125975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,8,power_law_1.01,0.021631999313831328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,8,power_law_1.01,0.021331200003623964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,8,power_law_1.01,0.021030400693416596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,8,power_law_1.01,0.025183999538421632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,8,power_law_1.01,0.024403199553489685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,8,power_law_1.01,0.0234047994017601
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,8,power_law_1.01,0.025183999538421632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,8,power_law_1.01,0.025497600436210632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,8,power_law_1.01,0.026982399821281432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,8,power_law_1.01,0.02800000011920929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,8,power_law_1.01,0.027929601073265076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,8,power_law_1.01,0.02869119942188263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,8,power_law_1.01,0.02990719974040985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,8,power_law_1.01,0.029075199365615846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,8,power_law_1.01,0.031231999397277832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,8,power_law_1.01,0.03201920092105866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,8,power_law_1.01,0.03550080060958862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,8,power_law_1.01,0.04023039937019348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,8,power_law_1.01,0.0630079984664917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,8,power_law_1.01,0.07649919986724854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,8,power_law_1.01,0.07125759720802308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,8,power_law_1.01,0.076665598154068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,8,power_law_1.01,0.09167360067367554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,8,power_law_1.01,0.113811194896698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,8,power_law_1.01,0.1456704020500183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,8,power_law_1.01,0.18787839412689208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,8,power_law_1.01,0.23758718967437745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,4,power_law_1.2,0.04316799938678741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,4,power_law_1.2,0.04599039852619171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,4,power_law_1.2,0.044659200310707095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,4,power_law_1.2,0.04808320105075836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,4,power_law_1.2,0.051532799005508424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,4,power_law_1.2,0.05384320020675659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,4,power_law_1.2,0.06198400259017944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,4,power_law_1.2,0.06479359865188598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,4,power_law_1.2,0.06110720038414001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,4,power_law_1.2,0.06159359812736511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,4,power_law_1.2,0.06692479848861695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,4,power_law_1.2,0.06837760210037232
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,4,power_law_1.2,0.0703935980796814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,4,power_law_1.2,0.07430400252342224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,4,power_law_1.2,0.07807360291481018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,4,power_law_1.2,0.08324480056762695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,4,power_law_1.2,0.08537600040435792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,4,power_law_1.2,0.10039039850234985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,4,power_law_1.2,0.10810879468917847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,4,power_law_1.2,0.13087999820709229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,4,power_law_1.2,0.14974080324172973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,4,power_law_1.2,0.19064320325851442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,4,power_law_1.2,0.22247679233551027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,4,power_law_1.2,0.31071360111236573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,4,power_law_1.2,0.40816640853881836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,4,power_law_1.2,0.5712512016296387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,4,power_law_1.2,0.7105855941772461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,64,balanced,0.0418453315893809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,64,balanced,0.04163199911514918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,64,balanced,0.04200000067551931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,64,balanced,0.045007998744646706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,64,balanced,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,64,balanced,0.04604266583919525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,64,balanced,0.048010667165120445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,64,balanced,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,64,balanced,0.04980266590913137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,64,balanced,0.05182399849096934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,64,balanced,0.051674668987592064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,64,balanced,0.05006400247414907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,64,balanced,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,64,balanced,0.05385066568851471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,64,balanced,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,64,balanced,0.0583840012550354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,64,balanced,0.06221333146095276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,64,balanced,0.06549866497516632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,64,balanced,0.0726560006539027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,64,balanced,0.08306666711966197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,64,balanced,0.0888320008913676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,64,balanced,0.11337066690127055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,64,balanced,0.13019733627637228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,64,balanced,0.16908266146977743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,64,balanced,0.21002666155497232
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,64,balanced,0.29308799902598065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,64,balanced,0.371237317721049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,128,power_law_1.01,0.020121599733829498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,128,power_law_1.01,0.018649600446224213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,128,power_law_1.01,0.017843200266361235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,128,power_law_1.01,0.017235200107097625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,128,power_law_1.01,0.01905920058488846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,128,power_law_1.01,0.021887999773025513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,128,power_law_1.01,0.021247999370098115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,128,power_law_1.01,0.029344001412391664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,128,power_law_1.01,0.028940799832344054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,128,power_law_1.01,0.02995840013027191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,128,power_law_1.01,0.029631999135017396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,128,power_law_1.01,0.029875200986862183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,128,power_law_1.01,0.030220800638198854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,128,power_law_1.01,0.02924799919128418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,128,power_law_1.01,0.029708799719810487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,128,power_law_1.01,0.031193599104881287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,128,power_law_1.01,0.032576000690460204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,128,power_law_1.01,0.033606401085853575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,128,power_law_1.01,0.036601600050926206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,128,power_law_1.01,0.04010879993438721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,128,power_law_1.01,0.04467200040817261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,128,power_law_1.01,0.05901439785957337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,128,power_law_1.01,0.06807680130004883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,128,power_law_1.01,0.09372159838676453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,128,power_law_1.01,0.11240960359573364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,128,power_law_1.01,0.16110719442367555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,128,power_law_1.01,0.19582719802856446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,64,power_law_1.01,0.09155200123786926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,64,power_law_1.01,0.06516479849815368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,64,power_law_1.01,0.05916159749031067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,64,power_law_1.01,0.06094719767570496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,64,power_law_1.01,0.05800319910049438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,64,power_law_1.01,0.05873280167579651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,64,power_law_1.01,0.05576320290565491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,64,power_law_1.01,0.0599295973777771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,64,power_law_1.01,0.06159999966621399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,64,power_law_1.01,0.060627198219299315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,64,power_law_1.01,0.06190720200538635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,64,power_law_1.01,0.06622719764709473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,64,power_law_1.01,0.06785920262336731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,64,power_law_1.01,0.06498559713363647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,64,power_law_1.01,0.07017599940299987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,64,power_law_1.01,0.07247999906539918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,64,power_law_1.01,0.07621759772300721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,64,power_law_1.01,0.08595200181007386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,64,power_law_1.01,0.08464639782905578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,64,power_law_1.01,0.09768319725990296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,64,power_law_1.01,0.10765440464019775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,64,power_law_1.01,0.12746880054473878
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,64,power_law_1.01,0.15276800394058226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,64,power_law_1.01,0.18922879695892333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,64,power_law_1.01,0.23306879997253419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,64,power_law_1.01,0.32478721141815187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,64,power_law_1.01,0.40476160049438475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,1,power_law_1.01,0.05149440169334411
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,1,power_law_1.01,0.05418879985809326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,1,power_law_1.01,0.06567040085792542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,1,power_law_1.01,0.08245760202407837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,1,power_law_1.01,0.10175360441207885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,1,power_law_1.01,0.11964160203933716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,1,power_law_1.01,0.15077120065689087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,1,power_law_1.01,0.15633280277252198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,1,power_law_1.01,0.16254719495773315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,1,power_law_1.01,0.1641983985900879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,1,power_law_1.01,0.17361279726028442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,1,power_law_1.01,0.1813248038291931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,1,power_law_1.01,0.1827455997467041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,1,power_law_1.01,0.20336639881134033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,1,power_law_1.01,0.208787202835083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,1,power_law_1.01,0.2164223909378052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,1,power_law_1.01,0.2165440082550049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,1,power_law_1.01,0.24499199390411378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,1,power_law_1.01,0.26924800872802734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,1,power_law_1.01,0.32768640518188474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,1,power_law_1.01,0.36805760860443115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,1,power_law_1.01,0.4716671943664551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,1,power_law_1.01,0.5521599769592285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,1,power_law_1.01,0.7383359909057617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,1,power_law_1.01,0.9211520195007324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,1,power_law_1.01,1.2868351936340332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,1,power_law_1.01,1.6626623153686524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,8,balanced,0.022298666338125866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,8,balanced,0.0230880007147789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,8,balanced,0.025770666698614757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,8,balanced,0.02739733209212621
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,8,balanced,0.041696002086003624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,8,balanced,0.0598826656738917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,8,balanced,0.06448533137639363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,8,balanced,0.0646666685740153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,8,balanced,0.08276799817879994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,8,balanced,0.07946133116881053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,8,balanced,0.08078399797280629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,8,balanced,0.1188106636206309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,8,balanced,0.11061333616574605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,8,balanced,0.11339733004570007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,8,balanced,0.12222400307655334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,8,balanced,0.12443199753761292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,8,balanced,0.1365120013554891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,8,balanced,0.15582399566968283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,8,balanced,0.16214399536450705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,8,balanced,0.1772480010986328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,8,balanced,0.19167466958363852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,8,balanced,0.21733866135279337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,8,balanced,0.23971732457478842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,8,balanced,0.27301865816116333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,8,balanced,0.3148426612218221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,8,balanced,0.5103520154953003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,8,balanced,0.5873973369598389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,8,power_law_1.01,0.05916799902915955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,8,power_law_1.01,0.07507200241088867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,8,power_law_1.01,0.08072959780693054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,8,power_law_1.01,0.08867200016975403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,8,power_law_1.01,0.09096959829330445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,8,power_law_1.01,0.0906495988368988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,8,power_law_1.01,0.0938368022441864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,8,power_law_1.01,0.09526399970054626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,8,power_law_1.01,0.09432960152626038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,8,power_law_1.01,0.09588479995727539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,8,power_law_1.01,0.09608319997787476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,2,power_law_1.2,0.07573760151863099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,8,power_law_1.01,0.09653760194778442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,2,power_law_1.2,0.08176000118255615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,8,power_law_1.01,0.10095360279083251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,8,power_law_1.01,0.10017280578613282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,2,power_law_1.2,0.09827200174331666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,8,power_law_1.01,0.10341119766235352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,2,power_law_1.2,0.11210880279541016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,8,power_law_1.01,0.10619519948959351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,2,power_law_1.2,0.11793919801712036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,8,power_law_1.01,0.1099776029586792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,2,power_law_1.2,0.1288383960723877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,8,power_law_1.01,0.1220352053642273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,8,power_law_1.01,0.1286911964416504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,2,power_law_1.2,0.15302400588989257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,8,power_law_1.01,0.14966399669647218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,2,power_law_1.2,0.15436160564422607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,8,power_law_1.01,0.18109439611434935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,2,power_law_1.2,0.15412479639053345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,8,power_law_1.01,0.21125760078430175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,2,power_law_1.2,0.15886720418930053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,8,power_law_1.01,0.25408639907836916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,2,power_law_1.2,0.1671679973602295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,8,power_law_1.01,0.36712958812713625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,2,power_law_1.2,0.16625920534133912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,8,power_law_1.01,0.4701695919036865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,2,power_law_1.2,0.16711039543151857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,8,power_law_1.01,0.633132791519165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,8,power_law_1.01,0.8324224472045898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,2,power_law_1.2,0.17461760044097902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,2,power_law_1.2,0.17827199697494506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,2,power_law_1.2,0.1767807960510254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,2,power_law_1.2,0.19158400297164918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,2,power_law_1.2,0.20638079643249513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,2,power_law_1.2,0.22791039943695068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,2,power_law_1.2,0.2691967964172363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,2,power_law_1.2,0.3056256055831909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,2,power_law_1.2,0.4016767978668213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,2,power_law_1.2,0.4853055953979492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,2,power_law_1.2,0.6782911777496338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,2,power_law_1.2,0.852883243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,2,power_law_1.2,1.2790656089782715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,2,power_law_1.2,1.5843968391418457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,4,power_law_1.2,0.046342399716377256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,4,power_law_1.2,0.05043839812278748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,4,power_law_1.2,0.05448319911956787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,4,power_law_1.2,0.05987200140953064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,4,power_law_1.2,0.06609280109405517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,4,power_law_1.2,0.07181439995765686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,4,power_law_1.2,0.07787520289421082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,4,power_law_1.2,0.08288639783859253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,4,power_law_1.2,0.08249599933624267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,4,power_law_1.2,0.08440960049629212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,4,power_law_1.2,0.08991360068321227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,4,power_law_1.2,0.09089919924736023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,4,power_law_1.2,0.0902783989906311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,4,power_law_1.2,0.09455360174179077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,4,power_law_1.2,0.1041152000427246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,4,power_law_1.2,0.10661760568618775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,4,power_law_1.2,0.11447679996490479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,4,power_law_1.2,0.12679040431976318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,4,power_law_1.2,0.1339903950691223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,4,power_law_1.2,0.16664960384368896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,4,power_law_1.2,0.18261760473251343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,4,power_law_1.2,0.24332799911499023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,4,power_law_1.2,0.29345920085906985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,4,power_law_1.2,0.38828799724578855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,4,power_law_1.2,0.5167232036590577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,4,power_law_1.2,0.7676671981811524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,4,power_law_1.2,0.9180288314819336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,8,balanced,0.04513066510359446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,8,balanced,0.04372799893220266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,8,balanced,0.04342400034268697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,8,balanced,0.04586666822433472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,8,balanced,0.05963733295599619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,8,balanced,0.0812906672557195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,8,balanced,0.08330666522185008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,8,balanced,0.07973333199818929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,8,balanced,0.08061866462230682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,8,balanced,0.07825600107510884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,8,balanced,0.08102400104204814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,8,balanced,0.08072533210118611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,8,balanced,0.08107199768225352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,8,balanced,0.0807360013326009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,8,balanced,0.08859733740488689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,8,balanced,0.08703999718030293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,8,balanced,0.09288533528645833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,8,balanced,0.10089600086212158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,8,balanced,0.10384000341097514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,8,balanced,0.11573867003122966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,8,balanced,0.1285653313000997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,8,balanced,0.15108799934387207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,8,balanced,0.17917867501576742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,8,balanced,0.22532800833384195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,8,balanced,0.26873066027959186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,8,balanced,0.3647306760152181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,8,balanced,0.45422399044036865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,16,power_law_1.01,0.05848960280418396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,16,power_law_1.01,0.06650879979133606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,16,power_law_1.01,0.05052800178527832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,16,power_law_1.01,0.052934402227401735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,16,power_law_1.01,0.052147197723388675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,16,power_law_1.01,0.05234559774398804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,16,power_law_1.01,0.053401601314544675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,16,power_law_1.01,0.053350400924682614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,16,power_law_1.01,0.05381119847297668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,16,power_law_1.01,0.054553598165512085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,16,power_law_1.01,0.05469440221786499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,16,power_law_1.01,0.05630080103874206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,16,power_law_1.01,0.05685120224952698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,16,power_law_1.01,0.058764797449111936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,16,power_law_1.01,0.06321280002593994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,16,power_law_1.01,0.06439039707183838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,16,power_law_1.01,0.06878079771995545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,16,power_law_1.01,0.07633280158042907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,16,power_law_1.01,0.0874559998512268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,16,power_law_1.01,0.09971200227737427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,16,power_law_1.01,0.10955519676208496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,16,power_law_1.01,0.1376512050628662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,16,power_law_1.01,0.17548799514770508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,16,power_law_1.01,0.2388223886489868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,16,power_law_1.01,0.27460479736328125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,16,power_law_1.01,0.39825921058654784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,16,power_law_1.01,0.5209983825683594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,8,power_law_1.01,0.0455487996339798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,8,power_law_1.01,0.04702079892158508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,8,power_law_1.01,0.047705599665641786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,8,power_law_1.01,0.04774399995803833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,8,power_law_1.01,0.049702399969100954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,8,power_law_1.01,0.04809600114822388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,8,power_law_1.01,0.05129600167274475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,8,power_law_1.01,0.052236801385879515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,8,power_law_1.01,0.05180799961090088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,8,power_law_1.01,0.05229439735412598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,8,power_law_1.01,0.053472000360488894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,8,power_law_1.01,0.05553920269012451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,8,power_law_1.01,0.056383997201919556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,8,power_law_1.01,0.060864001512527466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,8,power_law_1.01,0.06491519808769226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,8,power_law_1.01,0.06700159907341004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,8,power_law_1.01,0.06775040030479432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,8,power_law_1.01,0.07756159901618957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,8,power_law_1.01,0.08542720079421998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,8,power_law_1.01,0.1002303957939148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,8,power_law_1.01,0.11449600458145141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,8,power_law_1.01,0.16016639471054078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,8,power_law_1.01,0.20385279655456542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,8,power_law_1.01,0.27019519805908204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,8,power_law_1.01,0.3287872076034546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,8,power_law_1.01,0.4897280216217041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,8,power_law_1.01,0.6489151954650879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,64,power_law_1.2,0.04195199906826019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,64,power_law_1.2,0.0416128009557724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,64,power_law_1.2,0.041536000370979306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,64,power_law_1.2,0.04476799964904785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,64,power_law_1.2,0.04578559994697571
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,64,power_law_1.2,0.04602240025997162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,64,power_law_1.2,0.0471231997013092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,64,power_law_1.2,0.04799999892711639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,64,power_law_1.2,0.05072000026702881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,64,power_law_1.2,0.0502016007900238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,64,power_law_1.2,0.05085440278053284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,64,power_law_1.2,0.05203199982643127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,64,power_law_1.2,0.05180799961090088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,64,power_law_1.2,0.056627202033996585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,64,power_law_1.2,0.06109439730644226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,64,power_law_1.2,0.058873599767684935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,64,power_law_1.2,0.06332799792289734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,64,power_law_1.2,0.0714303970336914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,64,power_law_1.2,0.07896959781646729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,64,power_law_1.2,0.10042879581451417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,64,power_law_1.2,0.11900800466537476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,64,power_law_1.2,0.15792640447616577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,64,power_law_1.2,0.19971840381622313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,64,power_law_1.2,0.27025279998779295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,64,power_law_1.2,0.34647679328918457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,64,power_law_1.2,0.4956672191619873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,64,power_law_1.2,0.7808256149291992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,1,power_law_1.2,0.018566399812698364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,1,power_law_1.2,0.020121599733829498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,1,power_law_1.2,0.026624000072479247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,1,power_law_1.2,0.033881598711013795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,1,power_law_1.2,0.0458624005317688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,1,power_law_1.2,0.05601279735565186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,1,power_law_1.2,0.06971520185470581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,1,power_law_1.2,0.07521920204162598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,1,power_law_1.2,0.07614079713821412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,1,power_law_1.2,0.07947520017623902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,1,power_law_1.2,0.08233600258827209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,1,power_law_1.2,0.08172159790992736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,1,power_law_1.2,0.08741120100021363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,1,power_law_1.2,0.0902400016784668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,1,power_law_1.2,0.09501439929008484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,1,power_law_1.2,0.10042879581451417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,1,power_law_1.2,0.1122048020362854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,1,power_law_1.2,0.11904000043869019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,1,power_law_1.2,0.13214720487594606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,1,power_law_1.2,0.1583359956741333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,1,power_law_1.2,0.17874560356140137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,1,power_law_1.2,0.23057279586791993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,1,power_law_1.2,0.2081279993057251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,1,power_law_1.2,0.267903995513916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,1,power_law_1.2,0.33593599796295165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,1,power_law_1.2,0.46004481315612794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,1,power_law_1.2,0.5852799892425538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,128,power_law_1.2,0.020345599949359895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,128,power_law_1.2,0.01860480010509491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,128,power_law_1.2,0.01774719953536987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,128,power_law_1.2,0.017817600071430205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,128,power_law_1.2,0.020428800582885744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,128,power_law_1.2,0.020473599433898926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,128,power_law_1.2,0.02114560008049011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,128,power_law_1.2,0.02968960106372833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,128,power_law_1.2,0.028896000981330872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,128,power_law_1.2,0.028960001468658448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,128,power_law_1.2,0.029407998919486998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,128,power_law_1.2,0.029849600791931153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,128,power_law_1.2,0.02958720028400421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,128,power_law_1.2,0.029625600576400755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,128,power_law_1.2,0.030585598945617676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,128,power_law_1.2,0.031763198971748355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,128,power_law_1.2,0.03304319977760315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,128,power_law_1.2,0.034585601091384886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,128,power_law_1.2,0.03580160140991211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,128,power_law_1.2,0.04234879910945892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,128,power_law_1.2,0.04514560103416443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,128,power_law_1.2,0.060031998157501223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,1,balanced,0.05030933519204458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,1,balanced,0.053898667295773826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,128,power_law_1.2,0.07176960110664368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,128,power_law_1.2,0.09616000056266785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,128,power_law_1.2,0.11969280242919922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,128,power_law_1.2,0.1659327983856201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,128,power_law_1.2,0.21824638843536376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,1,balanced,0.06401599943637848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,1,balanced,0.08773866295814514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,1,balanced,0.12800533572832742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,1,balanced,0.1951786677042643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,1,balanced,0.1991573373476664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,1,balanced,0.19898132483164468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,1,balanced,0.19941866397857666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,1,balanced,0.20083733399709067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,1,balanced,0.2049600084622701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,1,balanced,0.20842132965723673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,1,balanced,0.21227733294169107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,1,balanced,0.21810666720072427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,1,balanced,0.22330133120218912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,1,balanced,0.2309066653251648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,1,balanced,0.245253324508667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,1,balanced,0.2752853234608968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,1,balanced,0.2966933250427246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,1,balanced,0.35121599833170575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,1,balanced,0.40057599544525146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,1,balanced,0.5321333408355713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,1,balanced,0.6148053407669067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,1,balanced,0.8656160036722819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,1,balanced,1.0653973420461018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,1,balanced,1.531215985616048
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,1,balanced,1.9700427055358887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,256,power_law_1.01,0.039340800046920775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,256,power_law_1.01,0.03978239893913269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,256,power_law_1.01,0.039955198764801025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,256,power_law_1.01,0.04264959990978241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,256,power_law_1.01,0.04426240026950836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,256,power_law_1.01,0.0452672004699707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,256,power_law_1.01,0.04648320078849792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,256,power_law_1.01,0.04733439981937408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,256,power_law_1.01,0.04896639883518219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,256,power_law_1.01,0.04906879961490631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,256,power_law_1.01,0.05256959795951843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,256,power_law_1.01,0.05457280278205871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,256,power_law_1.01,0.05000960230827332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,256,power_law_1.01,0.05495039820671081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,256,power_law_1.01,0.059673601388931276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,256,power_law_1.01,0.06108800172805786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,256,power_law_1.01,0.06370559930801392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,256,power_law_1.01,0.07219840288162231
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,256,power_law_1.01,0.07839999794960022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,256,power_law_1.01,0.0930239975452423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,256,power_law_1.01,0.10755840539932252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,256,power_law_1.01,0.14184319972991943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,256,power_law_1.01,0.17709439992904663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,256,power_law_1.01,0.25201280117034913
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,256,power_law_1.01,0.308409595489502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,256,power_law_1.01,0.4531263828277588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,256,power_law_1.01,0.5972224235534668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,8,balanced,0.04330666859944662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,8,balanced,0.04543999830881754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,8,balanced,0.04371733466784159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,8,balanced,0.04772266745567322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,8,balanced,0.06026133398214976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,8,balanced,0.08135466774304707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,8,balanced,0.08381332953770955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,8,balanced,0.08340799808502197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,8,balanced,0.08425066868464152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,8,balanced,0.08443733056386311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,8,balanced,0.0865119993686676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,8,balanced,0.084906667470932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,8,balanced,0.08866666754086812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,8,balanced,0.08891200025876363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,8,balanced,0.09436800082524617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,8,balanced,0.09697600205739339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,8,balanced,0.101200004418691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,8,balanced,0.108815997838974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,8,balanced,0.11599999666213989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,8,balanced,0.13127467036247253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,8,balanced,0.1455519994099935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,8,balanced,0.17833600441614786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,8,balanced,0.20331199963887533
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,8,balanced,0.27536000808080036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,8,balanced,0.33165866136550903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,8,balanced,0.47440000375111896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,8,balanced,0.5946986675262451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,32,balanced,0.04349866509437561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,32,balanced,0.04342933495839437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,32,balanced,0.04402133325735728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,32,balanced,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,32,balanced,0.04566933214664459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,32,balanced,0.049509331583976746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,32,balanced,0.05009600023428599
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,32,balanced,0.050000001986821495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,32,balanced,0.050399998823801674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,32,balanced,0.05208000044027964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,32,balanced,0.047968000173568726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,32,balanced,0.04961599906285604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,32,balanced,0.04795200129350027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,32,balanced,0.05383466680844625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,32,balanced,0.056159997979799904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,32,balanced,0.058143998185793556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,32,balanced,0.05622933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,32,balanced,0.06038933495680491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,32,balanced,0.06393066545327504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,32,balanced,0.06992533306280772
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,32,balanced,0.08041599889596303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,32,balanced,0.09723200400670369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,32,balanced,0.10869333148002625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,32,balanced,0.14829867084821066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,32,balanced,0.17735999822616577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,32,balanced,0.2325706680615743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,32,balanced,0.3001226584116618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,64,power_law_1.01,0.05931519865989685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,64,power_law_1.01,0.04663040041923523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,64,power_law_1.01,0.04583680033683777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,64,power_law_1.01,0.04657920002937317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,64,power_law_1.01,0.04648320078849792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,64,power_law_1.01,0.046777600049972536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,64,power_law_1.01,0.047244799137115476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,64,power_law_1.01,0.04807040095329285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,64,power_law_1.01,0.04847359955310822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,64,power_law_1.01,0.0483711987733841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,64,power_law_1.01,0.049030399322509764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,64,power_law_1.01,0.05060480237007141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,64,power_law_1.01,0.05084800124168396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,64,power_law_1.01,0.051846402883529666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,64,power_law_1.01,0.05642240047454834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,64,power_law_1.01,0.057606399059295654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,64,power_law_1.01,0.05979520082473755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,64,power_law_1.01,0.06643840074539184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,64,power_law_1.01,0.07164160013198853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,64,power_law_1.01,0.08240640163421631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,64,power_law_1.01,0.09190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,64,power_law_1.01,0.11148159503936768
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,64,power_law_1.01,0.13169280290603638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,64,power_law_1.01,0.17457280158996583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,64,power_law_1.01,0.21485440731048583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,64,power_law_1.01,0.29772799015045165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,64,power_law_1.01,0.40459518432617186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,2,power_law_1.2,0.044563201069831845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,2,power_law_1.2,0.04780159890651703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,2,power_law_1.2,0.05012480020523071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,2,power_law_1.2,0.05509120225906372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,2,power_law_1.2,0.06424319744110107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,2,power_law_1.2,0.0731328010559082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,2,power_law_1.2,0.08349440097808838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,2,power_law_1.2,0.08625280261039733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,2,power_law_1.2,0.08851199746131896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,2,power_law_1.2,0.08999680280685425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,2,power_law_1.2,0.09327359795570374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,2,power_law_1.2,0.09896960258483886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,2,power_law_1.2,0.10015360116958619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,2,power_law_1.2,0.10604159832000733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,2,power_law_1.2,0.11479040384292602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,2,power_law_1.2,0.1220736026763916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,2,power_law_1.2,0.12485120296478272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,2,power_law_1.2,0.14395519495010375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,2,power_law_1.2,0.16543999910354615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,2,power_law_1.2,0.20027520656585693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,2,power_law_1.2,0.22940800189971924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,2,power_law_1.2,0.2987584114074707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,2,power_law_1.2,0.36808960437774657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,2,power_law_1.2,0.5258944034576416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,2,power_law_1.2,0.6646656036376953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,2,power_law_1.2,0.9601408004760742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,2,power_law_1.2,1.2906175613403321
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,32,power_law_1.01,0.07731199860572815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,32,power_law_1.01,0.0745024025440216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,32,power_law_1.01,0.07587839961051941
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,32,power_law_1.01,0.07614719867706299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,32,power_law_1.01,0.07560960054397584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,32,power_law_1.01,0.07493119835853576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,32,power_law_1.01,0.07514240145683289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,32,power_law_1.01,0.07288320064544677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,32,power_law_1.01,0.07117440104484558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,32,power_law_1.01,0.07607679963111877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,32,power_law_1.01,0.07736319899559022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,32,power_law_1.01,0.08073599934577942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,32,power_law_1.01,0.08064640164375306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,32,power_law_1.01,0.08588799834251404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,32,power_law_1.01,0.08994560241699219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,32,power_law_1.01,0.09496960043907166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,32,power_law_1.01,0.09327359795570374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,32,power_law_1.01,0.10151679515838623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,32,power_law_1.01,0.1054527997970581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,32,power_law_1.01,0.11794559955596924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,32,power_law_1.01,0.1294592022895813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,32,power_law_1.01,0.15991679430007935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,32,power_law_1.01,0.18360960483551025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,32,power_law_1.01,0.21527678966522218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,32,power_law_1.01,0.2690176010131836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,32,power_law_1.01,0.3925823926925659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,32,power_law_1.01,0.48631038665771487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,8,power_law_1.2,0.07362560033798218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,8,power_law_1.2,0.10401279926300049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,8,power_law_1.2,0.10776319503784179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,8,power_law_1.2,0.12108800411224366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,8,power_law_1.2,0.14810880422592163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,8,power_law_1.2,0.14053759574890137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,8,power_law_1.2,0.16391680240631104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,8,power_law_1.2,0.15409280061721803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,8,power_law_1.2,0.15329920053482055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,8,power_law_1.2,0.15671039819717408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,8,power_law_1.2,0.15954560041427612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,8,power_law_1.2,0.15834879875183105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,8,power_law_1.2,0.1627392053604126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,8,power_law_1.2,0.15882240533828734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,8,power_law_1.2,0.16979199647903442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,8,power_law_1.2,0.17612799406051635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,8,power_law_1.2,0.1860159993171692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,8,power_law_1.2,0.21473278999328613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,8,power_law_1.2,0.21848320960998535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,8,power_law_1.2,0.2509183883666992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,8,power_law_1.2,0.26935040950775146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,8,power_law_1.2,0.3552767992019653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,8,power_law_1.2,0.33043200969696046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,8,power_law_1.2,0.43223037719726565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,8,power_law_1.2,0.5105088233947754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,8,power_law_1.2,0.7140607833862305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,8,power_law_1.2,0.9090239524841308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,16,power_law_1.01,0.046086400747299194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,16,power_law_1.01,0.05825279951095581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,16,power_law_1.01,0.04952960014343262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,16,power_law_1.01,0.046387198567390445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,16,power_law_1.01,0.04741120040416717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,16,power_law_1.01,0.0455808013677597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,16,power_law_1.01,0.04783360064029694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,16,power_law_1.01,0.04917759895324707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,16,power_law_1.01,0.048537600040435794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,16,power_law_1.01,0.04926080107688904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,16,power_law_1.01,0.04961279928684235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,16,power_law_1.01,0.052095997333526614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,16,power_law_1.01,0.053881597518920896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,16,power_law_1.01,0.054079997539520266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,16,power_law_1.01,0.05994240045547485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,16,power_law_1.01,0.06040319800376892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,16,power_law_1.01,0.06368640065193176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,16,power_law_1.01,0.0681984007358551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,16,power_law_1.01,0.07904000282287597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,16,power_law_1.01,0.09153280258178711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,16,power_law_1.01,0.1004032015800476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,16,power_law_1.01,0.13600640296936034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,16,power_law_1.01,0.1741312026977539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,16,power_law_1.01,0.2349695920944214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,16,power_law_1.01,0.2861119985580444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,16,power_law_1.01,0.401964807510376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,256,power_law_1.2,0.039052799344062805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,16,power_law_1.01,0.5191808223724366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,256,power_law_1.2,0.039904001355171206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,256,power_law_1.2,0.03949440121650696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,256,power_law_1.2,0.04387199878692627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,256,power_law_1.2,0.044537600874900815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,256,power_law_1.2,0.04567039906978607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,256,power_law_1.2,0.047091200947761536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,256,power_law_1.2,0.04736000001430511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,256,power_law_1.2,0.049107199907302855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,256,power_law_1.2,0.04953599870204926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,256,power_law_1.2,0.05306239724159241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,256,power_law_1.2,0.05495679974555969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,256,power_law_1.2,0.05030400156974792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,256,power_law_1.2,0.05608320236206055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,256,power_law_1.2,0.06135680079460144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,256,power_law_1.2,0.06216959953308106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,256,power_law_1.2,0.06709120273590088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,256,power_law_1.2,0.07577599883079529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,256,power_law_1.2,0.08436480164527893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,256,power_law_1.2,0.10304640531539917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,256,power_law_1.2,0.1283136010169983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,256,power_law_1.2,0.17479679584503174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,256,power_law_1.2,0.22812159061431886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,256,power_law_1.2,0.31356799602508545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,256,power_law_1.2,0.41364479064941406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,256,power_law_1.2,0.7586175918579101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,256,power_law_1.2,1.115891170501709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,1,power_law_1.01,0.12676479816436767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,1,power_law_1.01,0.14064639806747437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,1,power_law_1.01,0.170579195022583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,1,power_law_1.01,0.20307838916778564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,1,power_law_1.01,0.23876481056213378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,1,power_law_1.01,0.27200000286102294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,1,power_law_1.01,0.33416318893432617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,1,power_law_1.01,0.3493119955062866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,1,power_law_1.01,0.36087679862976074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,1,power_law_1.01,0.36294400691986084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,1,power_law_1.01,0.3759104013442993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,1,power_law_1.01,0.3874687910079956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,1,power_law_1.01,0.3904383897781372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,1,power_law_1.01,0.4098559856414795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,1,power_law_1.01,0.4046463966369629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,1,power_law_1.01,0.41320319175720216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,1,power_law_1.01,0.43240962028503416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,1,power_law_1.01,0.46393599510192873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,1,power_law_1.01,0.49704318046569823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,1,power_law_1.01,0.571065616607666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,1,power_law_1.01,0.6552576065063477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,1,power_law_1.01,0.7712768077850342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,1,power_law_1.01,0.9201279640197754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,1,power_law_1.01,1.230355167388916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,1,power_law_1.01,1.5482943534851075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,1,power_law_1.01,2.1674367904663088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,1,power_law_1.01,2.8212223052978516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,1,balanced,0.18412800629933676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,1,balanced,0.19369065761566162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,1,balanced,0.21385065714518228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,1,balanced,0.2770719925562541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,1,balanced,0.42122666041056317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,1,balanced,0.7034506797790527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,1,balanced,0.7077333132425944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,1,balanced,0.711178700129191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,1,balanced,0.7123786608378092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,1,balanced,0.7142240206400553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,1,balanced,0.7210986614227295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,1,balanced,0.7286026477813721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,1,balanced,0.7326133251190186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,1,balanced,0.7372053464253744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,1,balanced,0.7440106868743896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,1,balanced,0.7518400351206461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,1,balanced,0.7559466361999512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,1,balanced,0.7896052996317545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,1,balanced,0.813477357228597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,1,balanced,0.8729226589202881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,1,balanced,0.9378613630930582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,1,balanced,1.0327253341674805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,1,balanced,1.1444053649902344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,1,balanced,1.6120959917704265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,1,balanced,1.8540746370951335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,1,balanced,2.603450616200765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,1,balanced,3.444053332010905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,4,balanced,0.04151466737190882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,4,balanced,0.04170133173465729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,4,balanced,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,4,balanced,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,4,balanced,0.052186667919158936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,4,balanced,0.07251733541488647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,4,balanced,0.07248533268769582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,4,balanced,0.07379200061162312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,4,balanced,0.07503466804822286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,4,balanced,0.07341866691907246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,4,balanced,0.07482133309046428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,4,balanced,0.07643733421961467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,4,balanced,0.07673599819342296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,4,balanced,0.07655466596285503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,4,balanced,0.08362666765848796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,4,balanced,0.08269866804281871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,4,balanced,0.08668800195058186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,4,balanced,0.09557333588600159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,4,balanced,0.0988213320573171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,4,balanced,0.11262933413187663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,4,balanced,0.12660266955693564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,4,balanced,0.16249066591262817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,4,balanced,0.19081066052118936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,4,balanced,0.26340800523757935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,4,balanced,0.35944533348083496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,4,balanced,0.5141973495483398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,4,balanced,0.5816053152084351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,8,balanced,0.01960533360640208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,8,balanced,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,8,balanced,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,8,balanced,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,8,balanced,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,8,balanced,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,8,balanced,0.027589333554108936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,8,balanced,0.027429332335789997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,8,balanced,0.02770666778087616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,8,balanced,0.03158933420976003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,8,balanced,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,8,balanced,0.029466666281223297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,8,balanced,0.03349333256483078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,8,balanced,0.03384533276160558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,8,balanced,0.037578667203585304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,8,balanced,0.03941866755485535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,8,balanced,0.047354668378829956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,8,balanced,0.04828799764315287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,8,balanced,0.06326933205127716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,8,balanced,0.07906133433183034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,8,balanced,0.08249600231647491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,8,balanced,0.09284266829490662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,8,balanced,0.10708799958229065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,8,balanced,0.13343466321627298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,8,balanced,0.15754133462905884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,8,balanced,0.23258666197458902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,8,balanced,0.28197334210077923
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,1,power_law_1.01,0.1572543978691101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,1,power_law_1.01,0.1797824025154114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,1,power_law_1.01,0.20819199085235596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,1,power_law_1.01,0.27694718837738036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,1,power_law_1.01,0.3488703966140747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,1,power_law_1.01,0.42731518745422364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,1,power_law_1.01,0.5501247882843018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,1,power_law_1.01,0.5772543907165527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,1,power_law_1.01,0.5893824100494385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,1,power_law_1.01,0.6051968097686767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,1,power_law_1.01,0.6181759834289551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,1,power_law_1.01,0.6237696170806885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,1,power_law_1.01,0.6500288009643554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,1,power_law_1.01,0.6677440166473388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,1,power_law_1.01,0.6738880157470704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,1,power_law_1.01,0.6876160144805908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,1,power_law_1.01,0.704857587814331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,1,power_law_1.01,0.7437183856964111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,1,power_law_1.01,0.7743167877197266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,1,power_law_1.01,0.861292839050293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,1,power_law_1.01,0.9501184463500977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,1,power_law_1.01,1.092204761505127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,1,power_law_1.01,1.2878911972045899
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,1,power_law_1.01,1.680384063720703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,1,power_law_1.01,2.0660608291625975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,1,power_law_1.01,2.8629632949829102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,1,power_law_1.01,3.648537445068359
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,8,power_law_1.2,0.021644799411296843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,8,power_law_1.2,0.021324799954891206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,8,power_law_1.2,0.021030400693416596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,8,power_law_1.2,0.02383359968662262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,8,power_law_1.2,0.023609599471092223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,8,power_law_1.2,0.02248319983482361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,8,power_law_1.2,0.025337600708007814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,8,power_law_1.2,0.025337600708007814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,8,power_law_1.2,0.026771199703216553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,8,power_law_1.2,0.028115200996398925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,8,power_law_1.2,0.028230398893356323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,8,power_law_1.2,0.029388800263404846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,8,power_law_1.2,0.0308351993560791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,8,power_law_1.2,0.029516801238059998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,8,power_law_1.2,0.0322816014289856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,8,power_law_1.2,0.033913600444793704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,8,power_law_1.2,0.03521920144557953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,8,power_law_1.2,0.041331198811531064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,8,power_law_1.2,0.06401919722557067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,8,power_law_1.2,0.07847679853439331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,8,power_law_1.2,0.06982399821281433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,8,power_law_1.2,0.0850495994091034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,8,power_law_1.2,0.09476479887962341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,8,power_law_1.2,0.11815680265426635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,8,power_law_1.2,0.15137920379638672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,8,power_law_1.2,0.19275519847869874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,8,power_law_1.2,0.26587519645690916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,1,balanced,0.025605333348115284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,1,balanced,0.029530666768550873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,1,balanced,0.04555733501911163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,1,balanced,0.06101333101590475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,1,balanced,0.09520533680915833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,1,balanced,0.16637866695721945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,1,balanced,0.17062399784723917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,1,balanced,0.1767253279685974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,1,balanced,0.1814026633898417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,1,balanced,0.18668800592422485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,1,balanced,0.1860213279724121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,1,balanced,0.19171732664108276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,1,balanced,0.19828800360361734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,1,balanced,0.2088586688041687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,1,balanced,0.2218666672706604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,1,balanced,0.2328480084737142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,1,balanced,0.23470399777094522
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,1,balanced,0.28013867139816284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,1,balanced,0.2711893320083618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,1,balanced,0.36952535311381024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,1,balanced,0.3394346634546916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,1,balanced,0.44485334555308026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,1,balanced,0.410533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,1,balanced,0.5550933281580607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,1,balanced,0.7060320377349854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,1,balanced,1.021071990331014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,1,balanced,1.3397332827250164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,4,power_law_1.2,0.02499839961528778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,4,power_law_1.2,0.03710080087184906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,4,power_law_1.2,0.042931199073791504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,4,power_law_1.2,0.053324800729751584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,4,power_law_1.2,0.0581055998802185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,4,power_law_1.2,0.06291840076446534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,4,power_law_1.2,0.08208000063896179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,4,power_law_1.2,0.07468799948692321
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,4,power_law_1.2,0.08262400031089782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,4,power_law_1.2,0.0830784022808075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,4,power_law_1.2,0.08524799942970276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,4,power_law_1.2,0.09299839735031128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,4,power_law_1.2,0.09009280204772949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,4,power_law_1.2,0.09101439714431762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,4,power_law_1.2,0.13645440340042114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,4,power_law_1.2,0.12634880542755128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,4,power_law_1.2,0.11303039789199829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,4,power_law_1.2,0.19067519903182983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,4,power_law_1.2,0.21365759372711182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,4,power_law_1.2,0.1820032000541687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,4,power_law_1.2,0.2014847993850708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,4,power_law_1.2,0.20177919864654542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,4,power_law_1.2,0.21964800357818604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,4,power_law_1.2,0.27099521160125734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,4,power_law_1.2,0.332806396484375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,4,power_law_1.2,0.4512320041656494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,4,power_law_1.2,0.5760767936706543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,32,power_law_1.01,0.061977601051330565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,32,power_law_1.01,0.05552639961242676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,32,power_law_1.01,0.05311359763145447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,32,power_law_1.01,0.05300480127334595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,32,power_law_1.01,0.05384960174560547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,32,power_law_1.01,0.053363198041915895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,32,power_law_1.01,0.05297279953956604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,32,power_law_1.01,0.05375360250473023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,32,power_law_1.01,0.05496320128440857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,32,power_law_1.01,0.05459200143814087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,32,power_law_1.01,0.0572160005569458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,32,power_law_1.01,0.05664640069007874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,32,power_law_1.01,0.056601601839065555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,32,power_law_1.01,0.05868160128593445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,32,power_law_1.01,0.06295679807662964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,32,power_law_1.01,0.06781439781188965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,32,power_law_1.01,0.07022079825401306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,32,power_law_1.01,0.07837439775466919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,32,power_law_1.01,0.08492799997329711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,32,power_law_1.01,0.1019711971282959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,32,power_law_1.01,0.12434560060501099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,32,power_law_1.01,0.1477247953414917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,32,power_law_1.01,0.1730239987373352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,32,power_law_1.01,0.21420159339904785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,32,power_law_1.01,0.26574718952178955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,32,power_law_1.01,0.3724800109863281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,32,power_law_1.01,0.4631807804107666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,4,balanced,0.04410133262475332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,4,balanced,0.05406400064627329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,4,balanced,0.07092800239721934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,4,balanced,0.10931733250617981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,4,balanced,0.1713119943936666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,4,balanced,0.29227733612060547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,4,balanced,0.29291200637817383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,4,balanced,0.29552533229192096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,4,balanced,0.2955893278121948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,4,balanced,0.2962453365325928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,4,balanced,0.2980639934539795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,4,balanced,0.29894399642944336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,4,balanced,0.30191999673843384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,4,balanced,0.3014346758524577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,4,balanced,0.30914666255315143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,4,balanced,0.30932799975077313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,4,balanced,0.3139093319574992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,4,balanced,0.32396266857783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,4,balanced,0.33420801162719727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,4,balanced,0.35207998752593994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,4,balanced,0.37970133622487384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,4,balanced,0.42974400520324707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,4,balanced,0.4421813488006592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,4,balanced,0.5515199899673462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,4,balanced,0.6032106479008993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,4,balanced,0.8267892996470133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,4,balanced,0.9375733534495035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,4,balanced,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,4,balanced,0.04401599864164988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,4,balanced,0.04571199913819631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,4,balanced,0.056874667604764305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,4,balanced,0.08060266574223836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,4,balanced,0.11691199739774068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,4,balanced,0.11637866497039795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,4,balanced,0.11682666341463725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,4,balanced,0.11766933401425679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,4,balanced,0.11779200037320454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,4,balanced,0.11795733372370402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,4,balanced,0.12164800365765889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,4,balanced,0.12392533818880717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,4,balanced,0.1230560044447581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,4,balanced,0.13293332854906717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,4,balanced,0.13356799880663553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,4,balanced,0.1369493305683136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,4,balanced,0.15269866585731506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,4,balanced,0.15779200196266174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,4,balanced,0.18191999197006226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,4,balanced,0.2041813333829244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,4,balanced,0.24685867627461752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,4,balanced,0.2930826743443807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,4,balanced,0.3925600051879883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,4,balanced,0.46994133790334064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,4,balanced,0.6709706783294678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,4,balanced,0.8379840056101481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,64,balanced,0.01941866676012675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,64,balanced,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,64,balanced,0.01969066634774208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,64,balanced,0.020901332298914593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,64,balanced,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,64,balanced,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,64,balanced,0.0373333344856898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,64,balanced,0.03741333385308584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,64,balanced,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,64,balanced,0.0340639998515447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,64,balanced,0.03429333368937174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,64,balanced,0.033941333492596946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,64,balanced,0.03243733445803324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,64,balanced,0.031717332700888314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,64,balanced,0.039333333571751915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,64,balanced,0.04067733387152354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,64,balanced,0.03630933413902918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,64,balanced,0.03989866624275843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,64,balanced,0.04788800080617269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,64,balanced,0.05417066812515259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,64,balanced,0.06783466537793477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,64,balanced,0.08654933174451192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,64,balanced,0.10781332850456238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,64,balanced,0.14454933007558188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,64,balanced,0.18332266807556152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,64,balanced,0.2584799925486247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,64,balanced,0.33613868554433185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,32,balanced,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,32,balanced,0.039674667020638786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,32,balanced,0.04364266494909922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,32,balanced,0.041573333243529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,32,balanced,0.041573333243529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,32,balanced,0.043935999274253845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,32,balanced,0.04609066744645437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,32,balanced,0.04558399816354116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,32,balanced,0.04610666632652283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,32,balanced,0.04692799846331278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,32,balanced,0.04565866788228353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,32,balanced,0.04735999802748362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,32,balanced,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,32,balanced,0.04805333415667216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,32,balanced,0.05171200136343638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,32,balanced,0.052202666799227394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,32,balanced,0.055760001142819725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,32,balanced,0.058320000767707825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,32,balanced,0.061792001128196716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,32,balanced,0.0688266654809316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,32,balanced,0.07670400043328603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,32,balanced,0.09066667159398396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,32,balanced,0.10310399532318115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,32,balanced,0.13433067003885904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,32,balanced,0.16789867480595908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,32,balanced,0.22518932819366455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,32,balanced,0.28723732630411786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,1,power_law_1.01,0.08044800162315369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,1,power_law_1.01,0.08377599716186523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,1,power_law_1.01,0.08819839954376221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,1,power_law_1.01,0.10257279872894287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,1,power_law_1.01,0.11936639547348023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,1,power_law_1.01,0.13813120126724243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,1,power_law_1.01,0.157695996761322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,1,power_law_1.01,0.1628991961479187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,1,power_law_1.01,0.16824320554733277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,1,power_law_1.01,0.17121920585632325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,1,power_law_1.01,0.17772159576416016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,1,power_law_1.01,0.18524800539016723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,1,power_law_1.01,0.18533120155334473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,1,power_law_1.01,0.19251199960708618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,1,power_law_1.01,0.2009727954864502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,1,power_law_1.01,0.2067647933959961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,1,power_law_1.01,0.22257919311523439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,1,power_law_1.01,0.2555520057678223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,1,power_law_1.01,0.28022398948669436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,1,power_law_1.01,0.3471231937408447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,1,power_law_1.01,0.4134528160095215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,1,power_law_1.01,0.5425536155700683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,1,power_law_1.01,0.6703040122985839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,1,power_law_1.01,0.9362431526184082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,1,power_law_1.01,1.1970560073852539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,1,power_law_1.01,1.7286592483520509
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,1,power_law_1.01,2.256377601623535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,16,balanced,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,16,balanced,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,16,balanced,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,16,balanced,0.021007999777793884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,16,balanced,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,16,balanced,0.0252960001428922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,16,balanced,0.027386667827765148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,16,balanced,0.027232001225153606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,16,balanced,0.03222399950027466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,16,balanced,0.03290133426586787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,16,balanced,0.03133333226044973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,16,balanced,0.043765331308046974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,16,balanced,0.04287999868392944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,16,balanced,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,16,balanced,0.049695998430252075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,16,balanced,0.04957866668701172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,16,balanced,0.047055999437967934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,16,balanced,0.050698667764663696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,16,balanced,0.05569600065549215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,16,balanced,0.06648533542950948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,16,balanced,0.07417599856853485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,16,balanced,0.090421328941981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,16,balanced,0.10629866520563762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,16,balanced,0.13351999719937643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,16,balanced,0.16133866707483926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,16,balanced,0.23569599787394205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,16,balanced,0.29598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,16,power_law_1.2,0.06492159962654113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,16,power_law_1.2,0.07763199806213379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,16,power_law_1.2,0.07343360185623168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,16,power_law_1.2,0.07554559707641602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,16,power_law_1.2,0.07610880136489868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,16,power_law_1.2,0.06626560091972351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,16,power_law_1.2,0.07472000122070313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,16,power_law_1.2,0.07498239874839782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,16,power_law_1.2,0.0759552001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,16,power_law_1.2,0.07334399819374085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,16,power_law_1.2,0.07747840285301208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,16,power_law_1.2,0.07961599826812744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,16,power_law_1.2,0.07827200293540955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,16,power_law_1.2,0.08417919874191285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,16,power_law_1.2,0.09147520065307617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,16,power_law_1.2,0.09763839840888977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,16,power_law_1.2,0.09844480156898498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,16,power_law_1.2,0.11542400121688842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,16,power_law_1.2,0.11461119651794434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,16,power_law_1.2,0.14006400108337402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,16,power_law_1.2,0.15340800285339357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,16,power_law_1.2,0.19528959989547728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,16,power_law_1.2,0.24182400703430176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,16,power_law_1.2,0.31303040981292723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,16,power_law_1.2,0.3862720012664795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,16,power_law_1.2,0.5573823928833008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,16,power_law_1.2,0.6804800033569336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,8,balanced,0.041877334316571556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,8,balanced,0.04400533437728882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,8,balanced,0.04570133487383524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,8,balanced,0.057189335425694786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,8,balanced,0.07650133470694225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,8,balanced,0.11623467008272807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,8,balanced,0.11652800440788269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,8,balanced,0.11026133100191753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,8,balanced,0.11340266466140747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,8,balanced,0.11226133505503337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,8,balanced,0.11408000191052754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,8,balanced,0.11556800206502278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,8,balanced,0.1156160036722819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,8,balanced,0.11560533444086711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,8,balanced,0.12268267075220744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,8,balanced,0.12181867162386577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,8,balanced,0.12457600235939026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,8,balanced,0.13318933049837747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,8,balanced,0.13378666838010153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,8,balanced,0.14868799845377603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,8,balanced,0.15930666526158652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,8,balanced,0.185808002948761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,8,balanced,0.20172266165415445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,8,balanced,0.2579999963442485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,8,balanced,0.3075946569442749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,8,balanced,0.41255998611450195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,8,balanced,0.5031199852625529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,256,power_law_1.2,0.062003201246261595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,256,power_law_1.2,0.06586880087852479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,256,power_law_1.2,0.04337919950485229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,256,power_law_1.2,0.04351359903812409
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,256,power_law_1.2,0.04575360119342804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,256,power_law_1.2,0.045772799849510194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,256,power_law_1.2,0.04624640047550201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,256,power_law_1.2,0.04366079866886139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,256,power_law_1.2,0.044435200095176694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,256,power_law_1.2,0.044659200310707095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,256,power_law_1.2,0.04496000111103058
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,256,power_law_1.2,0.0462336003780365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,256,power_law_1.2,0.0466623991727829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,256,power_law_1.2,0.04831359982490539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,256,power_law_1.2,0.05370879769325256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,256,power_law_1.2,0.05551999807357788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,256,power_law_1.2,0.05763840079307556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,256,power_law_1.2,0.06513919830322265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,256,power_law_1.2,0.07181439995765686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,256,power_law_1.2,0.08728320002555848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,256,power_law_1.2,0.0951807975769043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,256,power_law_1.2,0.13024640083312988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,256,power_law_1.2,0.1454848051071167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,256,power_law_1.2,0.21816320419311525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,256,power_law_1.2,0.2771903991699219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,256,power_law_1.2,0.4158207893371582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,256,power_law_1.2,0.478547191619873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,16,power_law_1.2,0.018598400056362152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,16,power_law_1.2,0.01993599981069565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,16,power_law_1.2,0.020230400562286376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,16,power_law_1.2,0.019942399859428406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,16,power_law_1.2,0.021209600567817687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,16,power_law_1.2,0.021996800601482392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,16,power_law_1.2,0.024134400486946105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,16,power_law_1.2,0.02476799935102463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,16,power_law_1.2,0.02874239981174469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,16,power_law_1.2,0.027577599883079527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,16,power_law_1.2,0.02876160144805908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,16,power_law_1.2,0.03930239975452423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,16,power_law_1.2,0.040556800365447995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,16,power_law_1.2,0.04058879911899567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,16,power_law_1.2,0.04364160001277924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,16,power_law_1.2,0.043584001064300534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,16,power_law_1.2,0.04323199987411499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,16,power_law_1.2,0.04441600143909454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,16,power_law_1.2,0.04509440064430237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,16,power_law_1.2,0.05495039820671081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,16,power_law_1.2,0.0574400007724762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,16,power_law_1.2,0.07029119729995728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,16,power_law_1.2,0.08049920201301575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,16,power_law_1.2,0.10517120361328125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,16,power_law_1.2,0.1324671983718872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,16,power_law_1.2,0.17375359535217286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,16,power_law_1.2,0.23682560920715331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,16,power_law_1.01,0.047142401337623596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,16,power_law_1.01,0.05092480182647705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,16,power_law_1.01,0.048588800430297854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,16,power_law_1.01,0.051097601652145386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,16,power_law_1.01,0.05196800231933594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,16,power_law_1.01,0.054425597190856934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,16,power_law_1.01,0.055052798986434934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,16,power_law_1.01,0.05514240264892578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,16,power_law_1.01,0.05429120063781738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,16,power_law_1.01,0.05539839863777161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,16,power_law_1.01,0.05575680136680603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,16,power_law_1.01,0.0581055998802185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,16,power_law_1.01,0.058220797777175905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,16,power_law_1.01,0.06175360083580017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,16,power_law_1.01,0.06903039813041686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,16,power_law_1.01,0.0713599979877472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,16,power_law_1.01,0.07388160228729249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,16,power_law_1.01,0.08037760257720947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,16,power_law_1.01,0.0906175971031189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,16,power_law_1.01,0.10602240562438965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,16,power_law_1.01,0.12026879787445069
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,16,power_law_1.01,0.1605631947517395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,16,power_law_1.01,0.19173120260238646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,16,power_law_1.01,0.26090240478515625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,16,power_law_1.01,0.33573760986328127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,16,power_law_1.01,0.470143985748291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,16,power_law_1.01,0.637286376953125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,32,balanced,0.04387199878692627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,32,balanced,0.046309332052866616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,32,balanced,0.0451200008392334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,32,balanced,0.04731733103593191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,32,balanced,0.048026666045188904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,32,balanced,0.06439466774463654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,32,balanced,0.06611733138561249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,32,balanced,0.0666133314371109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,32,balanced,0.06677866478761037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,32,balanced,0.06809600194295247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,32,balanced,0.06963199873765309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,32,balanced,0.0684746652841568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,32,balanced,0.07049066821734111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,32,balanced,0.07074133555094402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,32,balanced,0.07484266658624013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,32,balanced,0.0775786687930425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,32,balanced,0.07906666894753774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,32,balanced,0.08475733796755473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,32,balanced,0.09136000275611877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,32,balanced,0.10201066732406616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,32,balanced,0.11289067069689433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,32,balanced,0.13329600294431052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,32,balanced,0.15728533267974854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,32,balanced,0.20221867163976034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,32,balanced,0.24237332741419473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,32,balanced,0.33157867193222046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,32,balanced,0.4163626829783122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,64,power_law_1.2,0.05663999915122986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,64,power_law_1.2,0.0474368005990982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,64,power_law_1.2,0.046374401450157164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,64,power_law_1.2,0.04692479968070984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,64,power_law_1.2,0.04699519872665405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,64,power_law_1.2,0.04754559993743897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,64,power_law_1.2,0.04805119931697845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,64,power_law_1.2,0.04864639937877655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,64,power_law_1.2,0.04956159889698029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,64,power_law_1.2,0.04912639856338501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,64,power_law_1.2,0.0499455988407135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,64,power_law_1.2,0.0506496012210846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,64,power_law_1.2,0.051481598615646364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,64,power_law_1.2,0.052147197723388675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,64,power_law_1.2,0.05768960118293762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,64,power_law_1.2,0.05809919834136963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,64,power_law_1.2,0.06103039979934692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,64,power_law_1.2,0.06845440268516541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,64,power_law_1.2,0.0732800006866455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,64,power_law_1.2,0.08652799725532531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,64,power_law_1.2,0.09436799883842469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,64,power_law_1.2,0.1183616042137146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,64,power_law_1.2,0.1348863959312439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,64,power_law_1.2,0.19360640048980712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,64,power_law_1.2,0.23324160575866698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,64,power_law_1.2,0.35274879932403563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,64,power_law_1.2,0.45635199546813965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,4,balanced,0.043925335009892784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,4,balanced,0.046053335070610046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,4,balanced,0.04725866516431173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,4,balanced,0.057429333527882896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,4,balanced,0.07737066845099132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,4,balanced,0.11733866731325786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,4,balanced,0.11964799960454305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,4,balanced,0.11390399932861328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,4,balanced,0.11787733435630798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,4,balanced,0.11541333794593811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,4,balanced,0.11694399515787761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,4,balanced,0.11939733227094014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,4,balanced,0.12109866738319397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,4,balanced,0.12012799580891927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,4,balanced,0.1277653376261393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,4,balanced,0.1283680001894633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,4,balanced,0.13058666388193765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,4,balanced,0.14061333735783896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,4,balanced,0.14548800388971964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,4,balanced,0.16397333145141602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,4,balanced,0.17795199155807495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,4,balanced,0.21401600042978922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,4,balanced,0.2453493277231852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,4,balanced,0.32419733206431073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,4,balanced,0.38125868638356525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,4,balanced,0.533951997756958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,4,balanced,0.6606880029042562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,16,balanced,0.05012799799442291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,16,balanced,0.05086400111516317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,16,balanced,0.05146666864554087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,16,balanced,0.05384533107280731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,16,balanced,0.058277333776156105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,16,balanced,0.07323200007279713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,16,balanced,0.07479466497898102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,16,balanced,0.07702933251857758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,16,balanced,0.07444266478220622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,16,balanced,0.0767680009206136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,16,balanced,0.07640533149242401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,16,balanced,0.07821866869926453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,16,balanced,0.08063999811808269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,16,balanced,0.08014933268229167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,16,balanced,0.08527466654777527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,16,balanced,0.0869653324286143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,16,balanced,0.08854400118192036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,16,balanced,0.093231995900472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,16,balanced,0.10030933221181233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,16,balanced,0.11173866192499797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,16,balanced,0.12073066830635071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,16,balanced,0.14615999658902487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,16,balanced,0.16481600205103555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,16,balanced,0.22166399161020914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,16,balanced,0.2674986720085144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,16,balanced,0.3733919858932495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,16,balanced,0.4714346726735433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,4,balanced,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,4,balanced,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,4,balanced,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,4,balanced,0.02940266579389572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,4,balanced,0.04163199911514918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,4,balanced,0.0592853327592214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,4,balanced,0.0614879975716273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,4,balanced,0.06337066491444905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,4,balanced,0.06513600051403046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,4,balanced,0.06817066669464111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,4,balanced,0.06788266698519389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,4,balanced,0.07223999996980031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,4,balanced,0.07435733576615651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,4,balanced,0.08152533570925395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,4,balanced,0.08266133566697438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,4,balanced,0.08265600105126698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,4,balanced,0.08894399801890056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,4,balanced,0.099263995885849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,4,balanced,0.11293866237004598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,4,balanced,0.1388320028781891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,4,balanced,0.18275733788808188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,4,balanced,0.19711466630299887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,4,balanced,0.2121653358141581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,4,balanced,0.22635199626286825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,4,balanced,0.24893865982691446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,4,balanced,0.4158506790796916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,4,balanced,0.4480746587117513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,4,power_law_1.2,0.05568000078201294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,4,power_law_1.2,0.058796799182891844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,4,power_law_1.2,0.05859839916229248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,4,power_law_1.2,0.06286079883575439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,4,power_law_1.2,0.06589440107345582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,4,power_law_1.2,0.07327359914779663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,4,power_law_1.2,0.07319039702415467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,4,power_law_1.2,0.07932159900665284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,4,power_law_1.2,0.07633280158042907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,4,power_law_1.2,0.08175359964370728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,4,power_law_1.2,0.08015999794006348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,4,power_law_1.2,0.0824895977973938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,4,power_law_1.2,0.08418560028076172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,4,power_law_1.2,0.08934400081634522
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,4,power_law_1.2,0.09527040123939515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,4,power_law_1.2,0.09850239753723145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,4,power_law_1.2,0.10024960041046142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,4,power_law_1.2,0.11589759588241577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,4,power_law_1.2,0.12533119916915894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,4,power_law_1.2,0.15673600435256957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,4,power_law_1.2,0.16785919666290283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,4,power_law_1.2,0.22678399085998535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,4,power_law_1.2,0.26935040950775146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,4,power_law_1.2,0.37971839904785154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,4,power_law_1.2,0.4635200023651123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,4,power_law_1.2,0.6743167877197266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,4,power_law_1.2,0.9440959930419922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,1,power_law_1.2,0.06629120111465454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,1,power_law_1.2,0.08609279990196228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,1,power_law_1.2,0.11268479824066162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,1,power_law_1.2,0.1601855993270874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,1,power_law_1.2,0.21100161075592042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,1,power_law_1.2,0.2849152088165283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,1,power_law_1.2,0.3969727993011475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,1,power_law_1.2,0.42855038642883303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,1,power_law_1.2,0.4383808135986328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,1,power_law_1.2,0.4583104133605957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,1,power_law_1.2,0.46869759559631347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,1,power_law_1.2,0.48204798698425294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,1,power_law_1.2,0.49556479454040525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,1,power_law_1.2,0.5251776218414307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,1,power_law_1.2,0.5438591957092285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,1,power_law_1.2,0.5633344173431396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,1,power_law_1.2,0.5808127880096435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,1,power_law_1.2,0.6596352100372315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,1,power_law_1.2,0.6704127788543701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,1,power_law_1.2,0.7816703796386719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,1,power_law_1.2,0.8088704109191894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,1,power_law_1.2,0.9519424438476562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,1,power_law_1.2,1.1236096382141114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,1,power_law_1.2,1.4348671913146973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,1,power_law_1.2,1.7110271453857422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,1,power_law_1.2,2.325734329223633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,1,power_law_1.2,2.9452415466308595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,1,power_law_1.01,0.0632960021495819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,1,power_law_1.01,0.08475520014762879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,1,power_law_1.01,0.10958720445632934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,1,power_law_1.01,0.1725119948387146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,1,power_law_1.01,0.2365056037902832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,1,power_law_1.01,0.3065471887588501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,1,power_law_1.01,0.405401611328125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,1,power_law_1.01,0.42700800895690916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,1,power_law_1.01,0.44182400703430175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,1,power_law_1.01,0.4506495952606201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,1,power_law_1.01,0.47574400901794434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,1,power_law_1.01,0.48647680282592776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,1,power_law_1.01,0.49015040397644044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,1,power_law_1.01,0.5077375888824462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,1,power_law_1.01,0.5227839946746826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,1,power_law_1.01,0.523744010925293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,1,power_law_1.01,0.5626239776611328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,1,power_law_1.01,0.6374527931213378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,1,power_law_1.01,0.6476863861083985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,1,power_law_1.01,0.7515520095825196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,1,power_law_1.01,0.7691775798797608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,1,power_law_1.01,0.9140480041503907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,1,power_law_1.01,1.0724672317504882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,1,power_law_1.01,1.3845312118530273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,1,power_law_1.01,1.6644672393798827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,1,power_law_1.01,2.2719295501708983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,1,power_law_1.01,2.9033023834228517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,2,power_law_1.2,0.05796480178833008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,2,power_law_1.2,0.07783679962158203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,2,power_law_1.2,0.0874176025390625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,2,power_law_1.2,0.1162943959236145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,2,power_law_1.2,0.1541632056236267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,2,power_law_1.2,0.17830400466918944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,2,power_law_1.2,0.22394239902496338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,2,power_law_1.2,0.2424384117126465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,2,power_law_1.2,0.24690558910369872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,2,power_law_1.2,0.24936320781707763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,2,power_law_1.2,0.25635199546813964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,2,power_law_1.2,0.27820799350738523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,2,power_law_1.2,0.2737855911254883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,2,power_law_1.2,0.2890752077102661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,2,power_law_1.2,0.2920192003250122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,2,power_law_1.2,0.315116810798645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,2,power_law_1.2,0.3318655967712402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,2,power_law_1.2,0.37331199645996094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,2,power_law_1.2,0.38000640869140623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,2,power_law_1.2,0.4469183921813965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,2,power_law_1.2,0.47166080474853517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,2,power_law_1.2,0.5736576080322265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,2,power_law_1.2,0.6641600131988525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,2,power_law_1.2,0.8444095611572265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,2,power_law_1.2,1.0200639724731446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,2,power_law_1.2,1.4391551971435548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,2,power_law_1.2,1.7915327072143554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,64,balanced,0.04577066500981649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,64,balanced,0.04588800172011057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,64,balanced,0.045706664522488914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,64,balanced,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,64,balanced,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,64,balanced,0.05006400247414907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,64,balanced,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,64,balanced,0.053861334919929504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,64,balanced,0.056186666091283165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,64,balanced,0.05831466615200043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,64,balanced,0.05596266686916351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,64,balanced,0.05390933156013489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,64,balanced,0.05707733333110809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,64,balanced,0.058304001887639366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,64,balanced,0.06611200173695882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,64,balanced,0.06452266871929169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,64,balanced,0.06990933418273926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,64,balanced,0.07441600163777669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,64,balanced,0.07875733574231465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,64,balanced,0.08880000313123067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,64,balanced,0.09701333443323772
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,64,balanced,0.12149866422017415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,64,balanced,0.13643733660380045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,64,balanced,0.18317866325378418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,64,balanced,0.22404799858729044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,64,balanced,0.3144426743189494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,64,balanced,0.39237332344055176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,1,balanced,0.13899200161298117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,1,balanced,0.14460800091425577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,1,balanced,0.1513866682847341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,1,balanced,0.16691199938456217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,1,balanced,0.20126932859420776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,1,balanced,0.27381332715352374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,1,balanced,0.2794933319091797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,1,balanced,0.2821066578229268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,1,balanced,0.280074675877889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,1,balanced,0.28457067410151166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,1,balanced,0.28799466292063397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,1,balanced,0.2941333254178365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,1,balanced,0.3012160062789917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,1,balanced,0.3016693393389384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,1,balanced,0.30006933212280273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,1,balanced,0.30714666843414307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,1,balanced,0.31668267647425336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,1,balanced,0.34523733456929523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,1,balanced,0.37191998958587646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,1,balanced,0.43036266167958576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,1,balanced,0.49369601408640545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,1,balanced,0.5789386828740438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,1,balanced,0.6835786501566569
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,1,balanced,0.9837226867675781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,1,balanced,1.2011306285858154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,1,balanced,1.725370724995931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,1,balanced,2.2673865954081216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,2,balanced,0.046240001916885376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,2,balanced,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,2,balanced,0.05961066484451294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,2,balanced,0.0769760012626648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,2,balanced,0.11271466811498006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,2,balanced,0.1736533244450887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,2,balanced,0.1767253279685974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,2,balanced,0.17919466892878214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,2,balanced,0.1800160010655721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,2,balanced,0.18195732434590658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,2,balanced,0.17989865938822427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,2,balanced,0.18238933881123862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,2,balanced,0.18683199087778726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,2,balanced,0.1876373291015625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,2,balanced,0.19292799631754556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,2,balanced,0.1975626746813456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,2,balanced,0.2010293404261271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,2,balanced,0.2153173287709554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,2,balanced,0.22612800200780234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,2,balanced,0.2563413381576538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,2,balanced,0.276309331258138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,2,balanced,0.34622931480407715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,2,balanced,0.3869493405024211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,2,balanced,0.5112906694412231
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,2,balanced,0.6050399939219157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,2,balanced,0.854640007019043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,2,balanced,1.0650986830393474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,256,power_law_1.2,0.04657920002937317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,256,power_law_1.2,0.04588800072669983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,256,power_law_1.2,0.04604800045490265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,256,power_law_1.2,0.048204800486564635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,256,power_law_1.2,0.04902400076389313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,256,power_law_1.2,0.049779200553894044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,256,power_law_1.2,0.05015040040016174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,256,power_law_1.2,0.05106559991836548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,256,power_law_1.2,0.05140479803085327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,256,power_law_1.2,0.05178880095481873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,256,power_law_1.2,0.05237119793891907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,256,power_law_1.2,0.053439998626708986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,256,power_law_1.2,0.05333120226860046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,256,power_law_1.2,0.05429760217666626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,256,power_law_1.2,0.05740159749984741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,256,power_law_1.2,0.05878400206565857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,256,power_law_1.2,0.06159999966621399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,256,power_law_1.2,0.06590719819068909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,256,power_law_1.2,0.07296000123023987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,256,power_law_1.2,0.08584319949150085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,256,power_law_1.2,0.0997759997844696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,256,power_law_1.2,0.12417919635772705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,256,power_law_1.2,0.16497279405593873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,256,power_law_1.2,0.2305216073989868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,256,power_law_1.2,0.28675839900970457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,256,power_law_1.2,0.435148811340332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,256,power_law_1.2,0.5624639987945557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,1,balanced,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,1,balanced,0.023237332701683044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,1,balanced,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,1,balanced,0.04083200047413508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,1,balanced,0.06107733150323232
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,1,balanced,0.09794132908185323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,1,balanced,0.10015466809272766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,1,balanced,0.10260267059008281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,1,balanced,0.10528533657391866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,1,balanced,0.10754666725794475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,1,balanced,0.10782399773597717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,1,balanced,0.11153599619865417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,1,balanced,0.11556266744931538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,1,balanced,0.12225600083669026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,1,balanced,0.1297760009765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,1,balanced,0.13819733262062073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,1,balanced,0.1423733333746592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,1,balanced,0.17156267166137695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,1,balanced,0.16247999668121338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,1,balanced,0.22004799048105875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,1,balanced,0.21359467506408691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,1,balanced,0.2807413339614868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,1,balanced,0.26731733481089276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,1,balanced,0.36212265491485596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,1,balanced,0.44069333871205646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,1,balanced,0.6315253178278605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,1,balanced,0.8226400216420492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,1,power_law_1.2,0.09103999733924865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,1,power_law_1.2,0.09901440143585205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,1,power_law_1.2,0.11463040113449097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,1,power_law_1.2,0.14549119472503663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,1,power_law_1.2,0.16977280378341675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,1,power_law_1.2,0.19805439710617065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,1,power_law_1.2,0.2476032018661499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,1,power_law_1.2,0.2585472106933594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,1,power_law_1.2,0.2651456117630005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,1,power_law_1.2,0.27042560577392577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,1,power_law_1.2,0.27552640438079834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,1,power_law_1.2,0.2803967952728271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,1,power_law_1.2,0.289516806602478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,1,power_law_1.2,0.3002432107925415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,1,power_law_1.2,0.3084800004959106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,1,power_law_1.2,0.31868159770965576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,1,power_law_1.2,0.3297663927078247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,1,power_law_1.2,0.37582719326019287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,1,power_law_1.2,0.4167168140411377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,1,power_law_1.2,0.4892672061920166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,1,power_law_1.2,0.5601088047027588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,1,power_law_1.2,0.7075263977050781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,1,power_law_1.2,0.857472038269043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,1,power_law_1.2,1.1494720458984375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,1,power_law_1.2,1.436019229888916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,1,power_law_1.2,2.00949764251709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,1,power_law_1.2,2.5739072799682616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,32,power_law_1.01,0.02250880002975464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,32,power_law_1.01,0.022412799298763275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,32,power_law_1.01,0.021542400121688843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,32,power_law_1.01,0.020044800639152528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,32,power_law_1.01,0.020793600380420683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,32,power_law_1.01,0.02072319984436035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,32,power_law_1.01,0.021087999641895293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,32,power_law_1.01,0.023571200668811798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,32,power_law_1.01,0.023180800676345825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,32,power_law_1.01,0.023455999791622162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,32,power_law_1.01,0.029580798745155335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,32,power_law_1.01,0.029798400402069092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,32,power_law_1.01,0.03051519989967346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,32,power_law_1.01,0.04009599983692169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,32,power_law_1.01,0.039110401272773744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,32,power_law_1.01,0.040031999349594116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,32,power_law_1.01,0.04434559941291809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,32,power_law_1.01,0.04750719964504242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,32,power_law_1.01,0.05672320127487183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,32,power_law_1.01,0.0668287992477417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,32,power_law_1.01,0.05360640287399292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,32,power_law_1.01,0.07179520130157471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,32,power_law_1.01,0.07900159955024719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,32,power_law_1.01,0.1029695987701416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,32,power_law_1.01,0.12511359453201293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,32,power_law_1.01,0.18450560569763183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,32,power_law_1.01,0.2329472064971924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,4,power_law_1.2,0.01796479970216751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,4,power_law_1.2,0.0193792000412941
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,4,power_law_1.2,0.01943040043115616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,4,power_law_1.2,0.022809599339962006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,4,power_law_1.2,0.023500800132751465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,4,power_law_1.2,0.02351360023021698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,4,power_law_1.2,0.026841598749160766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,4,power_law_1.2,0.0271807998418808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,4,power_law_1.2,0.031327998638153075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,4,power_law_1.2,0.030828800797462464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,4,power_law_1.2,0.03206399977207184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,4,power_law_1.2,0.03406080007553101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,4,power_law_1.2,0.03356800079345703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,4,power_law_1.2,0.03485440015792847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,4,power_law_1.2,0.04222080111503601
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,4,power_law_1.2,0.04420480132102966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,4,power_law_1.2,0.04379520118236542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,4,power_law_1.2,0.0651968002319336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,4,power_law_1.2,0.0711359977722168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,4,power_law_1.2,0.08387200236320495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,4,power_law_1.2,0.08587520122528076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,4,power_law_1.2,0.09550079703330994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,4,power_law_1.2,0.10894720554351807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,4,power_law_1.2,0.1362048029899597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,4,power_law_1.2,0.16305919885635375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,4,power_law_1.2,0.21415040493011475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,4,power_law_1.2,0.2719615936279297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,32,balanced,0.04196799794832865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,32,balanced,0.04275733232498169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,32,balanced,0.04151466737190882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,32,balanced,0.0459146648645401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,32,balanced,0.05978666742642721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,32,balanced,0.08124266564846039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,32,balanced,0.08102400104204814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,32,balanced,0.07895466685295105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,32,balanced,0.07896000146865845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,32,balanced,0.07893866797288258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,32,balanced,0.07879999776681264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,32,balanced,0.08089600006739299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,32,balanced,0.08064533273379008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,32,balanced,0.08082666496435802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,32,balanced,0.08475200335184734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,32,balanced,0.0864586631457011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,32,balanced,0.0890826682249705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,32,balanced,0.09409067034721375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,32,balanced,0.09817066788673401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,32,balanced,0.10467200477917989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,32,balanced,0.11333333452542622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,32,balanced,0.12923733393351236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,32,balanced,0.14634133378664652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,32,balanced,0.18134399255116782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,32,balanced,0.20951465765635172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,32,balanced,0.27699732780456543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,32,balanced,0.34331734975179035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,1,power_law_1.2,0.04938879907131195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,1,power_law_1.2,0.052160000801086424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,1,power_law_1.2,0.06050559878349304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,1,power_law_1.2,0.07909759879112244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,1,power_law_1.2,0.09739519953727722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,1,power_law_1.2,0.11393920183181763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,1,power_law_1.2,0.1477952003479004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,1,power_law_1.2,0.156550395488739
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,1,power_law_1.2,0.1580672025680542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,1,power_law_1.2,0.16462719440460205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,1,power_law_1.2,0.1720703959465027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,1,power_law_1.2,0.18056960105895997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,1,power_law_1.2,0.1855936050415039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,1,power_law_1.2,0.20363519191741944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,1,power_law_1.2,0.208351993560791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,1,power_law_1.2,0.21469440460205078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,1,power_law_1.2,0.21690878868103028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,1,power_law_1.2,0.2514303922653198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,1,power_law_1.2,0.2772864103317261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,1,power_law_1.2,0.3432960033416748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,1,power_law_1.2,0.37843201160430906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,1,power_law_1.2,0.47910399436950685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,1,power_law_1.2,0.563155221939087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,1,power_law_1.2,0.7406079769134521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,1,power_law_1.2,0.9227007865905762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,1,power_law_1.2,1.2780415534973144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,1,power_law_1.2,1.658527946472168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,4,power_law_1.2,0.04330880045890808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,4,power_law_1.2,0.04613119959831238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,4,power_law_1.2,0.046374401450157164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,4,power_law_1.2,0.052902400493621826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,4,power_law_1.2,0.05433599948883057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,4,power_law_1.2,0.05382400155067444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,4,power_law_1.2,0.06452479958534241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,4,power_law_1.2,0.06588799953460693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,4,power_law_1.2,0.06343039870262146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,4,power_law_1.2,0.06573439836502075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,4,power_law_1.2,0.06703360080718994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,4,power_law_1.2,0.07226240038871765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,4,power_law_1.2,0.07528960108757018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,4,power_law_1.2,0.07439360022544861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,4,power_law_1.2,0.08105599880218506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,4,power_law_1.2,0.08611199855804444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,4,power_law_1.2,0.09343360066413879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,4,power_law_1.2,0.10579839944839478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,4,power_law_1.2,0.11896320581436157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,4,power_law_1.2,0.14263039827346802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,4,power_law_1.2,0.16470400094985962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,4,power_law_1.2,0.21628799438476562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,4,power_law_1.2,0.2690367937088013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,4,power_law_1.2,0.364902400970459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,4,power_law_1.2,0.520249605178833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,4,power_law_1.2,0.6949376106262207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,4,power_law_1.2,0.8290047645568848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,4,balanced,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,4,balanced,0.052298665046691895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,4,balanced,0.052373334765434265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,4,balanced,0.05817066629727682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,4,balanced,0.06628266473611195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,4,balanced,0.078575998544693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,4,balanced,0.08074666559696198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,4,balanced,0.08084799846013387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,4,balanced,0.08120533327261607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,4,balanced,0.08061333497365315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,4,balanced,0.08275733391443889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,4,balanced,0.08455466230710347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,4,balanced,0.08501332998275757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,4,balanced,0.08684800068537395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,4,balanced,0.09156266848246257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,4,balanced,0.09302933017412822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,4,balanced,0.09557867050170898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,4,balanced,0.10613333185513814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,4,balanced,0.11360533038775127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,4,balanced,0.1381600002447764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,4,balanced,0.15687466661135355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,4,balanced,0.19216533501942953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,4,balanced,0.2283253272374471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,4,balanced,0.32093334197998047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,4,balanced,0.396229346593221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,4,balanced,0.5690666834513346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,4,balanced,0.7483092943827311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,8,balanced,0.05435733497142792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,8,balanced,0.05624533196290334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,8,balanced,0.05619200070699056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,8,balanced,0.058975999553998314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,8,balanced,0.06612800061702728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,8,balanced,0.08109866579373677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,8,balanced,0.08286400139331818
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,8,balanced,0.08471999565760295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,8,balanced,0.08097599943478902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,8,balanced,0.0841493308544159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,8,balanced,0.08286933104197185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,8,balanced,0.08481599887212117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,8,balanced,0.08727467060089111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,8,balanced,0.08661866188049316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,8,balanced,0.09148800373077393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,8,balanced,0.09303999940554301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,8,balanced,0.0958186686038971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,8,balanced,0.10079999764760335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,8,balanced,0.10825600226720174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,8,balanced,0.12237333257993062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,8,balanced,0.1344266633192698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,8,balanced,0.16169599692026773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,8,balanced,0.18365333477656046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,8,balanced,0.25282132625579834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,8,balanced,0.3091946641604106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,8,balanced,0.4477493365605672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,8,balanced,0.5812000036239624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,32,balanced,0.049973333875338234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,32,balanced,0.05123200019200643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,32,balanced,0.04978133241335551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,32,balanced,0.05386666456858317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,32,balanced,0.05594133337338766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,32,balanced,0.06914133330186208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,32,balanced,0.07042133311430614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,32,balanced,0.07251200079917908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,32,balanced,0.0708000014225642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,32,balanced,0.0738506664832433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,32,balanced,0.07256000240643819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,32,balanced,0.07262933254241943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,32,balanced,0.07568533221880595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,32,balanced,0.07473599910736084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,32,balanced,0.07913066446781158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,32,balanced,0.08070399860541026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,32,balanced,0.082997332016627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,32,balanced,0.08830400307973225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,32,balanced,0.0926026701927185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,32,balanced,0.10532800356547038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,32,balanced,0.11196266611417134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,32,balanced,0.13799466689427695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,32,balanced,0.1562933325767517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,32,balanced,0.20787733793258667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,32,balanced,0.24859732389450073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,32,balanced,0.3449973265329997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,32,balanced,0.4288746515909831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,64,power_law_1.01,0.04131839871406555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,64,power_law_1.01,0.03999359905719757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,64,power_law_1.01,0.03999359905719757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,64,power_law_1.01,0.042656001448631284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,64,power_law_1.01,0.043424001336097716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,64,power_law_1.01,0.04504320025444031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,64,power_law_1.01,0.046003198623657225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,64,power_law_1.01,0.04627839922904968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,64,power_law_1.01,0.046239998936653134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,64,power_law_1.01,0.047788798809051514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,64,power_law_1.01,0.04738560020923614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,64,power_law_1.01,0.04843519926071167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,64,power_law_1.01,0.04891520142555237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,64,power_law_1.01,0.05124480128288269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,64,power_law_1.01,0.05617920160293579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,64,power_law_1.01,0.056454402208328244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,64,power_law_1.01,0.06115840077400207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,64,power_law_1.01,0.0656063973903656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,64,power_law_1.01,0.07239680290222168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,64,power_law_1.01,0.08620799779891967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,64,power_law_1.01,0.10188159942626954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,64,power_law_1.01,0.1325376033782959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,64,power_law_1.01,0.15493119955062867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,64,power_law_1.01,0.211244797706604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,64,power_law_1.01,0.26946558952331545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,64,power_law_1.01,0.3761919975280762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,64,power_law_1.01,0.4947391986846924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,64,power_law_1.2,0.01822720021009445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,64,power_law_1.2,0.017958399653434754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,64,power_law_1.2,0.017983999848365784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,64,power_law_1.2,0.018739199638366698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,64,power_law_1.2,0.020735999941825865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,64,power_law_1.2,0.02409600019454956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,64,power_law_1.2,0.036051198840141296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,64,power_law_1.2,0.036185601353645326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,64,power_law_1.2,0.03332479894161224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,64,power_law_1.2,0.03336319923400879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,64,power_law_1.2,0.03336319923400879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,64,power_law_1.2,0.033497598767280576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,64,power_law_1.2,0.03359360098838806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,64,power_law_1.2,0.03314560055732727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,64,power_law_1.2,0.03307519853115082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,64,power_law_1.2,0.03394559919834137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,64,power_law_1.2,0.034431999921798705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,64,power_law_1.2,0.03622399866580963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,64,power_law_1.2,0.0379584014415741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,64,power_law_1.2,0.046137601137161255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,64,power_law_1.2,0.053286397457122804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,64,power_law_1.2,0.06443520188331604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,64,power_law_1.2,0.07697920203208923
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,64,power_law_1.2,0.10440959930419921
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,64,power_law_1.2,0.1336192011833191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,64,power_law_1.2,0.17564799785614013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,64,power_law_1.2,0.2378432035446167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,2,power_law_1.01,0.018719999492168425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,2,power_law_1.01,0.020294399559497835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,2,power_law_1.01,0.02446720004081726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,2,power_law_1.01,0.02910720109939575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,2,power_law_1.01,0.03720319867134094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,2,power_law_1.01,0.04428159892559051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,2,power_law_1.01,0.05057280063629151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,2,power_law_1.01,0.050444799661636355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,2,power_law_1.01,0.05133439898490906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,2,power_law_1.01,0.05441280007362366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,2,power_law_1.01,0.055430400371551516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,2,power_law_1.01,0.05629439949989319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,2,power_law_1.01,0.05861120223999024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,2,power_law_1.01,0.06106240153312683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,2,power_law_1.01,0.06179839968681335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,2,power_law_1.01,0.06408960223197938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,2,power_law_1.01,0.07420799732208253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,2,power_law_1.01,0.08386560082435608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,2,power_law_1.01,0.08899199962615967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,2,power_law_1.01,0.10622719526290894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,2,power_law_1.01,0.10300159454345703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,2,power_law_1.01,0.12872320413589478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,2,power_law_1.01,0.159334397315979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,2,power_law_1.01,0.19293440580368043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,2,power_law_1.01,0.2296447992324829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,2,power_law_1.01,0.2949631929397583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,2,power_law_1.01,0.3763711929321289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,32,power_law_1.01,0.050355201959609984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,32,power_law_1.01,0.04693120121955872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,32,power_law_1.01,0.04802559912204742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,32,power_law_1.01,0.0436928004026413
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,32,power_law_1.01,0.044326400756835936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,32,power_law_1.01,0.043680000305175784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,32,power_law_1.01,0.045817598700523376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,32,power_law_1.01,0.04656639993190766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,32,power_law_1.01,0.04678399860858917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,32,power_law_1.01,0.046758401393890384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,32,power_law_1.01,0.04835839867591858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,32,power_law_1.01,0.04927999973297119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,32,power_law_1.01,0.049395200610160825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,32,power_law_1.01,0.051948797702789304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,32,power_law_1.01,0.055667197704315184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,32,power_law_1.01,0.0574400007724762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,32,power_law_1.01,0.06170240044593811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,32,power_law_1.01,0.06984320282936096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,32,power_law_1.01,0.07064319849014282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,32,power_law_1.01,0.08280959725379944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,32,power_law_1.01,0.09481599926948547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,32,power_law_1.01,0.12348159551620483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,32,power_law_1.01,0.15713920593261718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,32,power_law_1.01,0.215231990814209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,32,power_law_1.01,0.26316161155700685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,32,power_law_1.01,0.359935998916626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,32,power_law_1.01,0.4705471992492676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,32,balanced,0.04364266494909922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,32,balanced,0.04410133262475332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,32,balanced,0.043866669138272606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,32,balanced,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,32,balanced,0.05634133517742157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,32,balanced,0.06204266846179962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,32,balanced,0.06071466704209646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,32,balanced,0.06253333389759064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,32,balanced,0.06401599943637848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,32,balanced,0.06238399942715963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,32,balanced,0.0646613339583079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,32,balanced,0.06449600060780843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,32,balanced,0.06238399942715963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,32,balanced,0.06698666512966156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,32,balanced,0.07025599976380666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,32,balanced,0.07054399947325389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,32,balanced,0.07321600119272868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,32,balanced,0.07814933359622955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,32,balanced,0.0807360013326009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,32,balanced,0.09092266360918681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,32,balanced,0.09921600421269734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,32,balanced,0.11582400401433308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,32,balanced,0.1361120045185089
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,32,balanced,0.18209065993626913
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,32,balanced,0.21701866388320923
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,32,balanced,0.29815999666849774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,32,balanced,0.3755999803543091
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,32,power_law_1.2,0.04498560130596161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,32,power_law_1.2,0.04594559967517853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,32,power_law_1.2,0.045535999536514285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,32,power_law_1.2,0.04449920058250427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,32,power_law_1.2,0.04316799938678741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,32,power_law_1.2,0.04350079894065857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,32,power_law_1.2,0.044537600874900815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,32,power_law_1.2,0.045798400044441225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,32,power_law_1.2,0.046028798818588255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,32,power_law_1.2,0.04542079865932465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,32,power_law_1.2,0.04806399941444397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,32,power_law_1.2,0.04881280064582825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,32,power_law_1.2,0.049446401000022885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,32,power_law_1.2,0.051507198810577394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,32,power_law_1.2,0.05554559826850891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,32,power_law_1.2,0.056569600105285646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,32,power_law_1.2,0.0602944016456604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,32,power_law_1.2,0.06747519969940186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,32,power_law_1.2,0.07162240147590637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,32,power_law_1.2,0.08464639782905578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,32,power_law_1.2,0.09654399752616882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,32,power_law_1.2,0.1251263976097107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,32,power_law_1.2,0.16053119897842408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,32,power_law_1.2,0.21756160259246826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,32,power_law_1.2,0.286028790473938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,32,power_law_1.2,0.4025792121887207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,32,power_law_1.2,0.5109951972961426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,32,power_law_1.01,0.06413440108299255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,32,power_law_1.01,0.06289920210838318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,32,power_law_1.01,0.06238080263137817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,32,power_law_1.01,0.06632320284843445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,32,power_law_1.01,0.06558719873428345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,32,power_law_1.01,0.0650111973285675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,32,power_law_1.01,0.06709120273590088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,32,power_law_1.01,0.06913920044898987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,32,power_law_1.01,0.0668287992477417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,32,power_law_1.01,0.0661952018737793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,32,power_law_1.01,0.06907520294189454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,32,power_law_1.01,0.0719871997833252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,32,power_law_1.01,0.07057920098304749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,32,power_law_1.01,0.07211520075798035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,32,power_law_1.01,0.07692800164222717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,32,power_law_1.01,0.07875199913978577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,32,power_law_1.01,0.08113920092582702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,32,power_law_1.01,0.09042559862136841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,32,power_law_1.01,0.09741439819335937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,32,power_law_1.01,0.11945600509643554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,32,power_law_1.01,0.13390719890594482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,32,power_law_1.01,0.1659327983856201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,32,power_law_1.01,0.21123840808868408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,32,power_law_1.01,0.2671231985092163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,32,power_law_1.01,0.3461375951766968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,32,power_law_1.01,0.5100480079650879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,32,power_law_1.01,0.6783103942871094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,2,power_law_1.01,0.020185600221157073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,2,power_law_1.01,0.024223999679088594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,2,power_law_1.01,0.02968960106372833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,2,power_law_1.01,0.04371840059757233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,2,power_law_1.01,0.055052798986434934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,2,power_law_1.01,0.061862397193908694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,2,power_law_1.01,0.07219200134277344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,2,power_law_1.01,0.07548800110816956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,2,power_law_1.01,0.08106880187988282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,2,power_law_1.01,0.08142719864845276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,2,power_law_1.01,0.08693119883537292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,2,power_law_1.01,0.08702719807624817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,2,power_law_1.01,0.08638079762458802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,2,power_law_1.01,0.08906239867210389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,2,power_law_1.01,0.09315840005874634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,2,power_law_1.01,0.09897599816322326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,2,power_law_1.01,0.11382399797439575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,2,power_law_1.01,0.12176640033721924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,2,power_law_1.01,0.12081279754638671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,2,power_law_1.01,0.146943998336792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,2,power_law_1.01,0.15311360359191895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,2,power_law_1.01,0.19228800535202026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,2,power_law_1.01,0.21045119762420655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,2,power_law_1.01,0.24574720859527588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,2,power_law_1.01,0.29050240516662595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,2,power_law_1.01,0.38260478973388673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,2,power_law_1.01,0.47013120651245116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,8,power_law_1.2,0.060678398609161376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,8,power_law_1.2,0.06867200136184692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,8,power_law_1.2,0.06677759885787964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,8,power_law_1.2,0.07530879974365234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,8,power_law_1.2,0.08036479949951172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,8,power_law_1.2,0.08614400029182434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,8,power_law_1.2,0.08095359802246094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,8,power_law_1.2,0.07685760259628296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,8,power_law_1.2,0.08193280100822449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,8,power_law_1.2,0.08437119722366333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,8,power_law_1.2,0.0827455997467041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,8,power_law_1.2,0.08424959778785705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,8,power_law_1.2,0.08831359744071961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,8,power_law_1.2,0.08559359908103943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,8,power_law_1.2,0.09186559915542603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,8,power_law_1.2,0.09616000056266785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,8,power_law_1.2,0.10003199577331542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,8,power_law_1.2,0.10932480096817017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,8,power_law_1.2,0.11847039461135864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,8,power_law_1.2,0.13833600282669067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,8,power_law_1.2,0.15788160562515258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,8,power_law_1.2,0.20508160591125488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,8,power_law_1.2,0.23394560813903809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,8,power_law_1.2,0.31120638847351073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,8,power_law_1.2,0.38810880184173585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,8,power_law_1.2,0.6165247917175293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,8,power_law_1.2,0.7339647769927978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,2,power_law_1.01,0.018169599771499633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,2,power_law_1.01,0.018534399569034576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,2,power_law_1.01,0.020025600492954255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,2,power_law_1.01,0.023366400599479677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,2,power_law_1.01,0.02781440019607544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,2,power_law_1.01,0.031590399146080014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,2,power_law_1.01,0.03467519879341126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,2,power_law_1.01,0.0381632000207901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,2,power_law_1.01,0.0406464010477066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,2,power_law_1.01,0.041388800740242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,2,power_law_1.01,0.04344319999217987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,2,power_law_1.01,0.045433598756790164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,2,power_law_1.01,0.04677119851112366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,2,power_law_1.01,0.05063679814338684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,2,power_law_1.01,0.050323200225830075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,2,power_law_1.01,0.050323200225830075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,2,power_law_1.01,0.055276799201965335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,2,power_law_1.01,0.0638592004776001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,2,power_law_1.01,0.06855679750442505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,2,power_law_1.01,0.11427199840545654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,2,power_law_1.01,0.12482559680938721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,2,power_law_1.01,0.1257024049758911
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,2,power_law_1.01,0.138099205493927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,2,power_law_1.01,0.16270079612731933
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,2,power_law_1.01,0.1971392035484314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,2,power_law_1.01,0.2644160032272339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,2,power_law_1.01,0.33255040645599365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,1,power_law_1.2,0.07762560248374939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,1,power_law_1.2,0.08081279993057251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,1,power_law_1.2,0.08446080088615418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,1,power_law_1.2,0.10230400562286376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,1,power_law_1.2,0.11455999612808228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,1,power_law_1.2,0.13110400438308717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,1,power_law_1.2,0.1558527946472168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,1,power_law_1.2,0.16323839426040648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,1,power_law_1.2,0.1668287992477417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,1,power_law_1.2,0.169868803024292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,1,power_law_1.2,0.17620480060577393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,1,power_law_1.2,0.1856063961982727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,1,power_law_1.2,0.18539520502090454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,1,power_law_1.2,0.19262720346450807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,1,power_law_1.2,0.20262401103973388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,1,power_law_1.2,0.20772480964660645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,1,power_law_1.2,0.22765440940856935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,1,power_law_1.2,0.25973761081695557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,1,power_law_1.2,0.28582398891448973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,1,power_law_1.2,0.3541951894760132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,1,power_law_1.2,0.41665921211242674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,1,power_law_1.2,0.5470911979675293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,1,power_law_1.2,0.6763391971588135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,1,power_law_1.2,0.9355263710021973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,1,power_law_1.2,1.1956159591674804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,1,power_law_1.2,1.7147775650024415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,1,power_law_1.2,2.256339263916016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,1,balanced,0.05246399839719137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,1,balanced,0.06041066845258077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,1,balanced,0.0794293334086736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,1,balanced,0.11642666657765706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,1,balanced,0.18252267440160116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,1,balanced,0.3062933286031087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,1,balanced,0.3115466634432475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,1,balanced,0.311082661151886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,1,balanced,0.31461334228515625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,1,balanced,0.31691733996073407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,1,balanced,0.31939200560251874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,1,balanced,0.32614399989446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,1,balanced,0.3288853367169698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,1,balanced,0.3325120011965434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,1,balanced,0.34044798215230304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,1,balanced,0.3468799988428752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,1,balanced,0.3594826857248942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,1,balanced,0.38837865988413495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,1,balanced,0.4097813367843628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,1,balanced,0.4639253218968709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,1,balanced,0.5194133520126343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,1,balanced,0.6516693433125814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,1,balanced,0.75381867090861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,1,balanced,1.0504480202992756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,1,balanced,1.2465226650238037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,1,balanced,1.8111359278361003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,1,balanced,2.2605279286702475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,8,balanced,0.02103466788927714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,8,balanced,0.01961600035429001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,8,balanced,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,8,balanced,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,8,balanced,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,8,balanced,0.027514666318893433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,8,balanced,0.02956799914439519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,8,balanced,0.029792000850041706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,8,balanced,0.031701333820819855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,8,balanced,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,8,balanced,0.03385599950949351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,8,balanced,0.035071998834609985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,8,balanced,0.036015999813874565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,8,balanced,0.03789866715669632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,8,balanced,0.0402453343073527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,8,balanced,0.04142933338880539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,8,balanced,0.05011733373006185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,8,balanced,0.053210665782292686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,8,balanced,0.07817066709200542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,8,balanced,0.09737066427866618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,8,balanced,0.09226666887601216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,8,balanced,0.10659199953079224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,8,balanced,0.12283200025558472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,8,balanced,0.1471946636835734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,8,balanced,0.1755626598993937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,8,balanced,0.2643146713574727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,8,balanced,0.3163839975992839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,1,power_law_1.2,0.053651201725006106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,1,power_law_1.2,0.05542399883270264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,1,power_law_1.2,0.05820159912109375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,1,power_law_1.2,0.06528639793395996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,1,power_law_1.2,0.07560319900512695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,1,power_law_1.2,0.08451200127601624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,1,power_law_1.2,0.10094079971313477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,1,power_law_1.2,0.1060863971710205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,1,power_law_1.2,0.10857599973678589
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,1,power_law_1.2,0.11039999723434449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,1,power_law_1.2,0.11637120246887207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,1,power_law_1.2,0.1263167977333069
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,1,power_law_1.2,0.13058559894561766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,1,power_law_1.2,0.13734400272369385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,1,power_law_1.2,0.14813439846038817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,1,power_law_1.2,0.1583680033683777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,1,power_law_1.2,0.15964159965515137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,1,power_law_1.2,0.19251840114593505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,1,power_law_1.2,0.212774395942688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,1,power_law_1.2,0.27070720195770265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,1,power_law_1.2,0.3264575958251953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,1,power_law_1.2,0.4298111915588379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,1,power_law_1.2,0.5378111839294434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,1,power_law_1.2,0.7391808032989502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,1,power_law_1.2,0.950169563293457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,1,power_law_1.2,1.359769630432129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,1,power_law_1.2,1.7724927902221679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,16,balanced,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,16,balanced,0.04011733333269755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,16,balanced,0.03805333375930786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,16,balanced,0.03957333415746689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,16,balanced,0.04140799989302953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,16,balanced,0.04422933359940847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,16,balanced,0.04417600234349569
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,16,balanced,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,16,balanced,0.04641066491603851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,16,balanced,0.04587733248869578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,16,balanced,0.04658666749795278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,16,balanced,0.04853333532810211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,16,balanced,0.048138668139775596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,16,balanced,0.04882133503754934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,16,balanced,0.05416533350944519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,16,balanced,0.05418133238951365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,16,balanced,0.056549335519472756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,16,balanced,0.06113066772619883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,16,balanced,0.06483733157316844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,16,balanced,0.07301866511503856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,16,balanced,0.08144000172615051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,16,balanced,0.09770133097966512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,16,balanced,0.11693867047627766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,16,balanced,0.15948800245920816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,16,balanced,0.18936532735824585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,16,balanced,0.25600000222524005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,16,balanced,0.32258133093516034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,32,power_law_1.01,0.05989120006561279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,32,power_law_1.01,0.04812160134315491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,32,power_law_1.01,0.04645119905471802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,32,power_law_1.01,0.049235200881958006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,32,power_law_1.01,0.04979200065135956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,32,power_law_1.01,0.04906879961490631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,32,power_law_1.01,0.05009920001029968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,32,power_law_1.01,0.05095679759979248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,32,power_law_1.01,0.05076479911804199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,32,power_law_1.01,0.050496000051498416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,32,power_law_1.01,0.0522816002368927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,32,power_law_1.01,0.05245440006256104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,32,power_law_1.01,0.053472000360488894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,32,power_law_1.01,0.05500800013542175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,32,power_law_1.01,0.05866879820823669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,32,power_law_1.01,0.06028159856796265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,32,power_law_1.01,0.06266239881515503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,32,power_law_1.01,0.0693120002746582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,32,power_law_1.01,0.0737600028514862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,32,power_law_1.01,0.08772479891777038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,32,power_law_1.01,0.09475200176239014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,32,power_law_1.01,0.12189439535140992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,32,power_law_1.01,0.13836159706115722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,32,power_law_1.01,0.1867583990097046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,32,power_law_1.01,0.23969919681549073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,32,power_law_1.01,0.3293503999710083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,32,power_law_1.01,0.43685121536254884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,32,power_law_1.01,0.05502079725265503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,32,power_law_1.01,0.05364480018615723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,32,power_law_1.01,0.0490880012512207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,32,power_law_1.01,0.05030400156974792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,32,power_law_1.01,0.050393599271774295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,32,power_law_1.01,0.04896639883518219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,32,power_law_1.01,0.0506496012210846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,32,power_law_1.01,0.05087360143661499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,32,power_law_1.01,0.05118079781532288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,32,power_law_1.01,0.0514240026473999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,32,power_law_1.01,0.052832001447677614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,32,power_law_1.01,0.05317760109901428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,32,power_law_1.01,0.05415679812431336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,32,power_law_1.01,0.054745602607727054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,32,power_law_1.01,0.0591871976852417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,32,power_law_1.01,0.06047999858856201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,32,power_law_1.01,0.06410880088806152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,32,power_law_1.01,0.07100800275802613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,32,power_law_1.01,0.07370240092277527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,32,power_law_1.01,0.08823680281639099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,32,power_law_1.01,0.09825279712677001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,32,power_law_1.01,0.11894400119781494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,32,power_law_1.01,0.13872640132904052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,32,power_law_1.01,0.19154560565948486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,32,power_law_1.01,0.2252863883972168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,32,power_law_1.01,0.32422399520874023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,32,power_law_1.01,0.41417598724365234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,2,power_law_1.01,0.07682560086250305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,2,power_law_1.01,0.08146560192108154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,2,power_law_1.01,0.10008959770202637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,2,power_law_1.01,0.11269760131835938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,2,power_law_1.01,0.12009600400924683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,2,power_law_1.01,0.13444479703903198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,2,power_law_1.01,0.1499392032623291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,2,power_law_1.01,0.15649919509887694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,2,power_law_1.01,0.15713920593261718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,2,power_law_1.01,0.15916160345077515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,2,power_law_1.01,0.163264000415802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,2,power_law_1.01,0.16547839641571044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,2,power_law_1.01,0.16856319904327394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,2,power_law_1.01,0.17248640060424805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,2,power_law_1.01,0.1769215941429138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,2,power_law_1.01,0.17749119997024537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,32,balanced,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,32,balanced,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,32,balanced,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,2,power_law_1.01,0.18598400354385375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,32,balanced,0.019440000255902607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,32,balanced,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,32,balanced,0.021589333812395733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,32,balanced,0.02537599951028824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,32,balanced,0.023936000963052113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,2,power_law_1.01,0.20441598892211915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,32,balanced,0.03766400118668874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,32,balanced,0.03787733366092046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,32,balanced,0.03775466730197271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,32,balanced,0.03754133234421412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,32,balanced,0.035642666121323906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,2,power_law_1.01,0.21976959705352783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,32,balanced,0.03522666543722153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,32,balanced,0.0399893323580424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,32,balanced,0.04243200023969015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,32,balanced,0.039520000418027244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,32,balanced,0.04428799947102865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,32,balanced,0.04587733248869578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,32,balanced,0.0554613322019577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,32,balanced,0.06402666866779327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,32,balanced,0.08193066716194153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,32,balanced,0.09731200337409973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,32,balanced,0.12985600034395853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,32,balanced,0.15901333093643188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,32,balanced,0.22497065862019858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,32,balanced,0.2853013277053833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,2,power_law_1.01,0.2571455955505371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,2,power_law_1.01,0.2988800048828125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,2,power_law_1.01,0.37240960597991946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,2,power_law_1.01,0.45011200904846194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,2,power_law_1.01,0.6272575855255127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,2,power_law_1.01,0.7807424068450928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,2,power_law_1.01,1.1456064224243163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,2,power_law_1.01,1.4330880165100097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,2,power_law_1.2,0.01889919936656952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,2,power_law_1.2,0.01889919936656952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,2,power_law_1.2,0.02072319984436035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,2,power_law_1.2,0.023801599442958832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,2,power_law_1.2,0.02879360020160675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,2,power_law_1.2,0.031251201033592226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,2,power_law_1.2,0.03644160032272339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,2,power_law_1.2,0.03803519904613495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,2,power_law_1.2,0.03925760090351105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,2,power_law_1.2,0.04085119962692261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,2,power_law_1.2,0.04508799910545349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,2,power_law_1.2,0.045407998561859134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,2,power_law_1.2,0.04670720100402832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,2,power_law_1.2,0.05050240159034729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,2,power_law_1.2,0.04963200092315674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,2,power_law_1.2,0.05146239995956421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,2,power_law_1.2,0.056492799520492555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,2,power_law_1.2,0.06498559713363647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,2,power_law_1.2,0.07139840126037597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,2,power_law_1.2,0.1171455979347229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,2,power_law_1.2,0.1276927947998047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,2,power_law_1.2,0.12951040267944336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,2,power_law_1.2,0.1434623956680298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,2,power_law_1.2,0.17571840286254883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,2,power_law_1.2,0.21082239151000975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,2,power_law_1.2,0.27292160987854003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,2,power_law_1.2,0.33320960998535154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,2,power_law_1.01,0.07779840230941773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,2,power_law_1.01,0.09059200286865235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,2,power_law_1.01,0.10733439922332763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,2,power_law_1.01,0.1344383955001831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,2,power_law_1.01,0.155840003490448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,2,power_law_1.01,0.17344640493392943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,2,power_law_1.01,0.19422719478607178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,2,power_law_1.01,0.20541439056396485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,2,power_law_1.01,0.2092288017272949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,2,power_law_1.01,0.21561601161956787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,2,power_law_1.01,0.22207999229431152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,2,power_law_1.01,0.2244096040725708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,2,power_law_1.01,0.2215359926223755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,2,power_law_1.01,0.23126399517059326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,2,power_law_1.01,0.23386240005493164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,2,power_law_1.01,0.24129281044006348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,2,power_law_1.01,0.24547200202941893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,2,power_law_1.01,0.2607872009277344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,2,power_law_1.01,0.2852288007736206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,2,power_law_1.01,0.3263168096542358
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,2,power_law_1.01,0.3796031951904297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,2,power_law_1.01,0.4816319942474365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,2,power_law_1.01,0.5447743892669678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,2,power_law_1.01,0.7416063785552979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,2,power_law_1.01,0.9441791534423828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,2,power_law_1.01,1.2499967575073243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,2,power_law_1.01,1.6373952865600585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,8,power_law_1.01,0.046105599403381346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,8,power_law_1.01,0.05516160130500793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,8,power_law_1.01,0.060192000865936277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,8,power_law_1.01,0.06677759885787964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,8,power_law_1.01,0.07249280214309692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,8,power_law_1.01,0.073471999168396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,8,power_law_1.01,0.07582079768180847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,8,power_law_1.01,0.07378559708595275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,8,power_law_1.01,0.07397119998931885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,8,power_law_1.01,0.07331200242042542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,8,power_law_1.01,0.07592319846153259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,8,power_law_1.01,0.0776639997959137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,8,power_law_1.01,0.07734400033950806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,8,power_law_1.01,0.07807360291481018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,8,power_law_1.01,0.08802559971809387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,8,power_law_1.01,0.09048960208892823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,8,power_law_1.01,0.09317759871482849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,8,power_law_1.01,0.10165120363235473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,8,power_law_1.01,0.10508160591125489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,8,power_law_1.01,0.11983360052108764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,8,power_law_1.01,0.135808002948761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,8,power_law_1.01,0.16813440322875978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,8,power_law_1.01,0.19705599546432495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,8,power_law_1.01,0.251526403427124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,8,power_law_1.01,0.29706881046295164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,8,power_law_1.01,0.4330239772796631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,8,power_law_1.01,0.545420789718628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,16,power_law_1.01,0.06906239986419678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,16,power_law_1.01,0.09097599983215332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,16,power_law_1.01,0.08852480053901672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,16,power_law_1.01,0.09200000166893005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,16,power_law_1.01,0.09189760088920593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,16,power_law_1.01,0.0831167995929718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,16,power_law_1.01,0.0920960009098053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,16,power_law_1.01,0.0929472029209137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,16,power_law_1.01,0.08941439986228943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,16,power_law_1.01,0.09160959720611572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,16,power_law_1.01,0.09387519955635071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,16,power_law_1.01,0.09354879856109619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,16,power_law_1.01,0.09473279714584351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,16,power_law_1.01,0.09708160161972046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,16,power_law_1.01,0.10341759920120239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,16,power_law_1.01,0.10323200225830079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,16,power_law_1.01,0.1074112057685852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,16,power_law_1.01,0.11404800415039062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,16,power_law_1.01,0.12097280025482178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,16,power_law_1.01,0.1411072015762329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,16,power_law_1.01,0.16650880575180055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,16,power_law_1.01,0.2069375991821289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,16,power_law_1.01,0.2364799976348877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,16,power_law_1.01,0.31284480094909667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,16,power_law_1.01,0.43561601638793945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,16,power_law_1.01,0.5554495811462402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,16,power_law_1.01,0.7376448154449463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,2,power_law_1.2,0.08893439769744874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,2,power_law_1.2,0.10373120307922364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,2,power_law_1.2,0.11771520376205444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,2,power_law_1.2,0.13184640407562256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,2,power_law_1.2,0.14700160026550294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,2,power_law_1.2,0.1636031985282898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,2,power_law_1.2,0.19705599546432495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,2,power_law_1.2,0.2076159954071045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,2,power_law_1.2,0.20591359138488768
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,2,power_law_1.2,0.20905599594116211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,2,power_law_1.2,0.21671679019927978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,2,power_law_1.2,0.22506239414215087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,2,power_law_1.2,0.23352959156036376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,2,power_law_1.2,0.24031360149383546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,2,power_law_1.2,0.24416639804840087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,2,power_law_1.2,0.24715518951416016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,2,power_law_1.2,0.2539263963699341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,2,power_law_1.2,0.27311999797821046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,2,power_law_1.2,0.29908480644226076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,2,power_law_1.2,0.3447103977203369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,2,power_law_1.2,0.3938368082046509
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,2,power_law_1.2,0.49185919761657715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,2,power_law_1.2,0.5853312015533447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,2,power_law_1.2,0.8164992332458496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,2,power_law_1.2,1.0037376403808593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,2,power_law_1.2,1.471174430847168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,2,power_law_1.2,1.878169631958008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,4,balanced,0.03976000100374222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,4,balanced,0.04005866746107737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,4,balanced,0.04205866654713949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,4,balanced,0.04804799954096476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,4,balanced,0.06247466802597046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,4,balanced,0.08463999629020691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,4,balanced,0.0872320036093394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,4,balanced,0.09044266740481059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,4,balanced,0.08988266189893086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,4,balanced,0.09029866258303325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,4,balanced,0.09035199880599976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,4,balanced,0.09300800164540608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,4,balanced,0.0925546685854594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,4,balanced,0.09593066573143005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,4,balanced,0.10245866576830547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,4,balanced,0.10502933462460835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,4,balanced,0.11180800199508667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,4,balanced,0.12468266487121582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,4,balanced,0.1280693312486013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,4,balanced,0.15363199512163797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,4,balanced,0.16710400581359863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,4,balanced,0.21356266736984253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,4,balanced,0.24665067593256632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,4,balanced,0.3413493235905965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,4,balanced,0.4185813268025716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,4,balanced,0.6067946751912435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,4,balanced,0.7712053457895914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,128,power_law_1.01,0.04948480129241943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,128,power_law_1.01,0.05842559933662415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,128,power_law_1.01,0.04348799884319306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,128,power_law_1.01,0.04437119960784912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,128,power_law_1.01,0.04438399970531463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,128,power_law_1.01,0.04471679925918579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,128,power_law_1.01,0.045459198951721194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,128,power_law_1.01,0.04378879964351654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,128,power_law_1.01,0.043628799915313723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,128,power_law_1.01,0.04416640102863312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,128,power_law_1.01,0.04428800046443939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,128,power_law_1.01,0.045638400316238406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,128,power_law_1.01,0.045952001214027406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,128,power_law_1.01,0.047167998552322385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,64,power_law_1.2,0.0871295988559723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,128,power_law_1.01,0.05121279954910278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,64,power_law_1.2,0.05911039710044861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,128,power_law_1.01,0.05173119902610779
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,128,power_law_1.01,0.05478399991989136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,64,power_law_1.2,0.05307520031929016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,128,power_law_1.01,0.06270080208778381
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,64,power_law_1.2,0.05742719769477844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,128,power_law_1.01,0.06481279730796814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,128,power_law_1.01,0.07559679746627808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,64,power_law_1.2,0.052262401580810545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,128,power_law_1.01,0.08652799725532531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,64,power_law_1.2,0.05206400156021118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,128,power_law_1.01,0.11029119491577148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,128,power_law_1.01,0.11345920562744141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,64,power_law_1.2,0.0597055971622467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,128,power_law_1.01,0.1577023983001709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,64,power_law_1.2,0.05783039927482605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,128,power_law_1.01,0.19127680063247682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,64,power_law_1.2,0.05549439787864685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,128,power_law_1.01,0.2657344102859497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,64,power_law_1.2,0.05944960117340088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,128,power_law_1.01,0.331712007522583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,64,power_law_1.2,0.06032639741897583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,64,power_law_1.2,0.06561279892921448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,64,power_law_1.2,0.06725119948387145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,64,power_law_1.2,0.06490240097045899
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,64,power_law_1.2,0.07064319849014282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,64,power_law_1.2,0.07338879704475403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,64,power_law_1.2,0.07480319738388061
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,64,power_law_1.2,0.08405759930610657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,64,power_law_1.2,0.08826239705085755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,64,power_law_1.2,0.10509439706802368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,64,power_law_1.2,0.10994559526443481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,64,power_law_1.2,0.14005119800567628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,64,power_law_1.2,0.1657088041305542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,64,power_law_1.2,0.20762240886688232
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,64,power_law_1.2,0.27560958862304685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,64,power_law_1.2,0.3963776111602783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,64,power_law_1.2,0.590822410583496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,64,balanced,0.01953599974513054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,64,balanced,0.019296000401178997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,64,balanced,0.019551999866962433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,64,balanced,0.020661332954963047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,64,balanced,0.02072000006834666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,64,balanced,0.021520001192887623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,64,balanced,0.03601066768169403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,64,balanced,0.03573333223660787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,64,balanced,0.03089066594839096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,64,balanced,0.03143999973932902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,64,balanced,0.03149333347876867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,64,balanced,0.031845333675543465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,64,balanced,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,64,balanced,0.03151999910672506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,64,balanced,0.03585600107908249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,64,balanced,0.03611200054486593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,64,balanced,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,64,balanced,0.03958400090535482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,64,balanced,0.043365334471066795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,64,balanced,0.04660266637802124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,64,balanced,0.06038933495680491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,64,balanced,0.0740533322095871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,64,balanced,0.09130133191744487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,64,balanced,0.12178132931391399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,64,balanced,0.1546986699104309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,64,balanced,0.21464000145594278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,64,balanced,0.27748266855875653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,256,balanced,0.03982399900754293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,256,balanced,0.040821333726247154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,256,balanced,0.03982399900754293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,256,balanced,0.04279999931653341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,256,balanced,0.04387199878692627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,256,balanced,0.04322133461634318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,256,balanced,0.04590400060017904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,256,balanced,0.047744000951449074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,256,balanced,0.045935998360315956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,256,balanced,0.0499839981396993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,256,balanced,0.05195199946562449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,256,balanced,0.05272000034650167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,256,balanced,0.04952000081539154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,256,balanced,0.05379733443260193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,256,balanced,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,256,balanced,0.05834666887919108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,256,balanced,0.06264000137646993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,256,balanced,0.06658666829268138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,256,balanced,0.0724373310804367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,256,balanced,0.08401067058245341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,256,balanced,0.08922666311264038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,256,balanced,0.11556266744931538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,256,balanced,0.13165866335233053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,256,balanced,0.1693920095761617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,256,balanced,0.2097866733868917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,256,balanced,0.2903040051460266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,256,balanced,0.37045331796010333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,16,power_law_1.01,0.061913597583770755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,16,power_law_1.01,0.07601280212402343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,16,power_law_1.01,0.07411199808120728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,16,power_law_1.01,0.07711359858512878
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,16,power_law_1.01,0.07778559923171997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,16,power_law_1.01,0.07468159794807434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,16,power_law_1.01,0.07671040296554565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,16,power_law_1.01,0.07373440265655518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,16,power_law_1.01,0.07917439937591553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,16,power_law_1.01,0.07563520073890687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,16,power_law_1.01,0.07756159901618957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,16,power_law_1.01,0.08213760256767273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,16,power_law_1.01,0.08095359802246094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,16,power_law_1.01,0.08257920145988465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,16,power_law_1.01,0.09047039747238159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,16,power_law_1.01,0.09335039854049683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,16,power_law_1.01,0.09584640264511109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,16,power_law_1.01,0.1087615966796875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,16,power_law_1.01,0.11278719902038574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,16,power_law_1.01,0.1362239956855774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,16,power_law_1.01,0.14910080432891845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,16,power_law_1.01,0.18096640110015869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,16,power_law_1.01,0.2189311981201172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,16,power_law_1.01,0.2911423921585083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,16,power_law_1.01,0.35969278812408445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,16,power_law_1.01,0.516377592086792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,16,power_law_1.01,0.6304255962371826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,8,power_law_1.2,0.04668799936771393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,8,power_law_1.2,0.049619200825691226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,8,power_law_1.2,0.048019200563430786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,8,power_law_1.2,0.049030399322509764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,8,power_law_1.2,0.05246719717979431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,8,power_law_1.2,0.049465599656105044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,8,power_law_1.2,0.05310080051422119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,8,power_law_1.2,0.05368319749832153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,8,power_law_1.2,0.05234559774398804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,8,power_law_1.2,0.05317760109901428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,8,power_law_1.2,0.05545600056648255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,8,power_law_1.2,0.05785599946975708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,8,power_law_1.2,0.0592960000038147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,8,power_law_1.2,0.06344959735870362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,8,power_law_1.2,0.07100160121917724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,8,power_law_1.2,0.07496960163116455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,8,power_law_1.2,0.07308160066604615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,8,power_law_1.2,0.08401920199394226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,8,power_law_1.2,0.0913536012172699
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,8,power_law_1.2,0.11328639984130859
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,8,power_law_1.2,0.1345471978187561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,8,power_law_1.2,0.17601920366287233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,8,power_law_1.2,0.204966402053833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,8,power_law_1.2,0.28847360610961914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,8,power_law_1.2,0.37484800815582275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,8,power_law_1.2,0.540556812286377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,8,power_law_1.2,0.6106815814971924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,8,power_law_1.01,0.05398399829864502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,8,power_law_1.01,0.06282240152359009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,8,power_law_1.01,0.061324799060821535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,8,power_law_1.01,0.07125759720802308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,8,power_law_1.01,0.07680000066757202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,8,power_law_1.01,0.07449600100517273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,8,power_law_1.01,0.07578880190849305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,8,power_law_1.01,0.07647359967231751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,8,power_law_1.01,0.07845759987831116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,8,power_law_1.01,0.08001279830932617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,8,power_law_1.01,0.08110079765319825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,8,power_law_1.01,0.08284800052642823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,8,power_law_1.01,0.08479359745979309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,8,power_law_1.01,0.08520320057868958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,8,power_law_1.01,0.08837119936943054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,8,power_law_1.01,0.09004799723625183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,8,power_law_1.01,0.09236479997634887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,8,power_law_1.01,0.10163840055465698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,8,power_law_1.01,0.11066880226135253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,8,power_law_1.01,0.135315203666687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,8,power_law_1.01,0.15366400480270387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,8,power_law_1.01,0.18163199424743653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,1,power_law_1.01,0.0898688018321991
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,8,power_law_1.01,0.23011200428009032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,1,power_law_1.01,0.10291839838027954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,8,power_law_1.01,0.3155328035354614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,1,power_law_1.01,0.12896000146865844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,8,power_law_1.01,0.3981184005737305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,1,power_law_1.01,0.17993600368499757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,8,power_law_1.01,0.5869503974914551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,1,power_law_1.01,0.22456319332122804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,8,power_law_1.01,0.7602496147155762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,1,power_law_1.01,0.26206719875335693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,1,power_law_1.01,0.3266304016113281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,1,power_law_1.01,0.341644811630249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,1,power_law_1.01,0.3502336025238037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,1,power_law_1.01,0.35888640880584716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,1,power_law_1.01,0.3692352056503296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,1,power_law_1.01,0.37141120433807373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,1,power_law_1.01,0.37852799892425537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,1,power_law_1.01,0.37944319248199465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,1,power_law_1.01,0.39297919273376464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,1,power_law_1.01,0.3951296091079712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,1,power_law_1.01,0.4116223812103271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,1,power_law_1.01,0.45549440383911133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,1,power_law_1.01,0.48801279067993164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,1,power_law_1.01,0.5696512222290039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,1,power_law_1.01,0.6511616230010986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,1,power_law_1.01,0.8073856353759765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,1,power_law_1.01,0.9668095588684082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,1,power_law_1.01,1.2773951530456542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,1,power_law_1.01,1.571833610534668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,1,power_law_1.01,2.1819520950317384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,1,power_law_1.01,2.782636833190918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,32,balanced,0.04167466859022776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,32,balanced,0.041850666205088295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,32,balanced,0.041690667470296226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,32,balanced,0.04383466641108195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,32,balanced,0.0476746658484141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,32,balanced,0.0480373352766037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,32,balanced,0.04983466863632202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,32,balanced,0.049914668003718056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,32,balanced,0.04996799925963084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,32,balanced,0.04971200227737427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,32,balanced,0.051728000243504844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,32,balanced,0.051776001850763954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,32,balanced,0.051813334226608276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,32,balanced,0.05381333331267039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,32,balanced,0.0561706672112147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,32,balanced,0.05789866546789805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,32,balanced,0.06035199761390686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,32,balanced,0.06597866614659627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,32,balanced,0.06819200019041698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,32,balanced,0.0807360013326009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,32,balanced,0.09511466821034749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,32,balanced,0.12075199683507283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,32,balanced,0.1441973348458608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,32,balanced,0.19460266828536987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,32,balanced,0.23175466060638428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,32,balanced,0.317248006661733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,32,balanced,0.4076053301493327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,8,power_law_1.01,0.06130560040473938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,8,power_law_1.01,0.07893760204315185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,8,power_law_1.01,0.08589439988136291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,8,power_law_1.01,0.08899199962615967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,8,power_law_1.01,0.0976960003376007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,8,power_law_1.01,0.09411200284957885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,8,power_law_1.01,0.10183680057525635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,8,power_law_1.01,0.10435199737548828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,8,power_law_1.01,0.0988864004611969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,8,power_law_1.01,0.10371840000152588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,8,power_law_1.01,0.10521600246429444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,8,power_law_1.01,0.10625920295715333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,8,power_law_1.01,0.11129599809646606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,8,power_law_1.01,0.11358079910278321
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,8,power_law_1.01,0.11795200109481811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,8,power_law_1.01,0.11938560009002686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,8,power_law_1.01,0.12817920446395875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,8,power_law_1.01,0.14218239784240722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,8,power_law_1.01,0.15094399452209473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,8,power_law_1.01,0.17681280374526978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,8,power_law_1.01,0.19415680170059205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,8,power_law_1.01,0.23973119258880615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,8,power_law_1.01,0.27891199588775634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,8,power_law_1.01,0.3648960113525391
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,8,power_law_1.01,0.4609792232513428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,8,power_law_1.01,0.6683712005615234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,8,power_law_1.01,0.7788288116455078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,16,power_law_1.01,0.05724800229072571
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,16,power_law_1.01,0.05668479800224304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,16,power_law_1.01,0.055871999263763426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,16,power_law_1.01,0.055174398422241214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,16,power_law_1.01,0.05444480180740356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,16,power_law_1.01,0.05419520139694214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,16,power_law_1.01,0.056403201818466184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,16,power_law_1.01,0.05708799958229065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,16,power_law_1.01,0.057574397325515746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,16,power_law_1.01,0.05594879984855652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,16,power_law_1.01,0.05804160237312317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,16,power_law_1.01,0.06106240153312683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,16,power_law_1.01,0.06064000129699707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,16,power_law_1.01,0.06098560094833374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,16,power_law_1.01,0.0679423987865448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,16,power_law_1.01,0.06963199973106385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,16,power_law_1.01,0.07405440211296081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,16,power_law_1.01,0.08468480110168457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,16,power_law_1.01,0.08963199853897094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,16,power_law_1.01,0.10776959657669068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,16,power_law_1.01,0.11626240015029907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,16,power_law_1.01,0.1533951997756958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,16,power_law_1.01,0.17240320444107055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,16,power_law_1.01,0.22940800189971924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,16,power_law_1.01,0.3121279954910278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,16,power_law_1.01,0.4140927791595459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,16,power_law_1.01,0.5192831993103028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,4,power_law_1.01,0.05178239941596985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,4,power_law_1.01,0.0542527973651886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,4,power_law_1.01,0.05702400207519531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,4,power_law_1.01,0.06181120276451111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,4,power_law_1.01,0.0654528021812439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,4,power_law_1.01,0.07169280052185059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,4,power_law_1.01,0.07902719974517822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,4,power_law_1.01,0.07546240091323853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,4,power_law_1.01,0.07942399978637696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,4,power_law_1.01,0.07804160118103028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,4,power_law_1.01,0.07869439721107482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,4,power_law_1.01,0.08025599718093872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,4,power_law_1.01,0.08172799944877625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,4,power_law_1.01,0.08479999899864196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,4,power_law_1.01,0.09224960207939148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,4,power_law_1.01,0.09412479996681214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,8,power_law_1.01,0.05334399938583374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,4,power_law_1.01,0.09950079917907714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,8,power_law_1.01,0.05570560097694397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,8,power_law_1.01,0.06107519865036011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,8,power_law_1.01,0.060659199953079224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,8,power_law_1.01,0.06711680293083191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,8,power_law_1.01,0.07429119944572449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,4,power_law_1.01,0.10976639986038209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,8,power_law_1.01,0.0695743978023529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,4,power_law_1.01,0.12367360591888428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,8,power_law_1.01,0.06930559873580933
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,4,power_law_1.01,0.14758399724960328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,8,power_law_1.01,0.07599999904632568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,4,power_law_1.01,0.16703360080718993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,8,power_law_1.01,0.07214720249176025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,4,power_law_1.01,0.21365759372711182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,8,power_law_1.01,0.07723519802093506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,4,power_law_1.01,0.2593024015426636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,8,power_law_1.01,0.07441279888153077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,4,power_law_1.01,0.35201919078826904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,8,power_law_1.01,0.07767680287361145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,8,power_law_1.01,0.07871999740600585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,4,power_law_1.01,0.45783038139343263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,8,power_law_1.01,0.08788480162620545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,4,power_law_1.01,0.6349376201629638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,8,power_law_1.01,0.08755199909210205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,8,power_law_1.01,0.09462400078773499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,4,power_law_1.01,0.8629759788513184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,8,power_law_1.01,0.10535039901733398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,8,power_law_1.01,0.11361919641494751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,8,power_law_1.01,0.1355072021484375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,8,power_law_1.01,0.15756800174713134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,8,power_law_1.01,0.1844480037689209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,8,power_law_1.01,0.2247488021850586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,8,power_law_1.01,0.30076799392700193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,8,power_law_1.01,0.36960639953613283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,8,power_law_1.01,0.5151231765747071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,8,power_law_1.01,0.7013567924499512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,8,power_law_1.2,0.0469184011220932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,8,power_law_1.2,0.050316798686981204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,8,power_law_1.2,0.04529280066490173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,8,power_law_1.2,0.0490880012512207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,8,power_law_1.2,0.04864639937877655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,8,power_law_1.2,0.050400000810623166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,8,power_law_1.2,0.050521600246429446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,8,power_law_1.2,0.0502016007900238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,8,power_law_1.2,0.051923197507858274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,8,power_law_1.2,0.050432002544403075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,8,power_law_1.2,0.051558399200439455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,8,power_law_1.2,0.05231999754905701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,8,power_law_1.2,0.055641597509384154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,8,power_law_1.2,0.0555840015411377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,8,power_law_1.2,0.0621504008769989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,8,power_law_1.2,0.06394240260124207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,8,power_law_1.2,0.06995199918746949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,8,power_law_1.2,0.08136320114135742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,8,power_law_1.2,0.08368639945983887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,8,power_law_1.2,0.09638400077819824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,8,power_law_1.2,0.11744639873504639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,8,power_law_1.2,0.14440319538116456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,8,power_law_1.2,0.1699136018753052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,8,power_law_1.2,0.2529088020324707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,8,power_law_1.2,0.3359935998916626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,8,power_law_1.2,0.4390079975128174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,8,power_law_1.2,0.4951168060302734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,32,power_law_1.01,0.0425024002790451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,32,power_law_1.01,0.04310399889945984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,32,power_law_1.01,0.04352000057697296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,32,power_law_1.01,0.044486400485038755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,32,power_law_1.01,0.04428159892559051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,32,power_law_1.01,0.044684800505638125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,32,power_law_1.01,0.045228800177574156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,32,power_law_1.01,0.045772799849510194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,32,power_law_1.01,0.0463808000087738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,32,power_law_1.01,0.04640640020370483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,32,power_law_1.01,0.047993600368499756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,32,power_law_1.01,0.04878720045089722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,32,power_law_1.01,0.04837760031223297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,32,power_law_1.01,0.05127679705619812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,32,power_law_1.01,0.05502079725265503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,32,power_law_1.01,0.056441599130630495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,32,power_law_1.01,0.06090880036354065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,128,balanced,0.04563199977080027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,128,balanced,0.0458186666170756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,128,balanced,0.04572799801826477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,128,balanced,0.04765866696834564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,128,balanced,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,128,balanced,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,128,balanced,0.0517439991235733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,128,balanced,0.05192000170548757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,128,balanced,0.05227200190226237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,128,balanced,0.05179200073083242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,128,balanced,0.05442133545875549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,128,balanced,0.05587733288606008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,128,balanced,0.053914666175842285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,128,balanced,0.055957332253456116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,128,balanced,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,128,balanced,0.060421332716941833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,128,balanced,0.06241600215435028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,128,balanced,0.06647466619809468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,128,balanced,0.07047466437021892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,128,balanced,0.08050133287906647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,128,balanced,0.08861333131790161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,128,balanced,0.10301333665847778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,128,balanced,0.12143466869990031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,128,balanced,0.17903467019399008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,32,power_law_1.01,0.06596480011940002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,128,balanced,0.2223200003306071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,128,balanced,0.30801065762837726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,32,power_law_1.01,0.06947839856147767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,128,balanced,0.388314684232076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,32,power_law_1.01,0.08097919821739197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,32,power_law_1.01,0.08957440257072449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,32,power_law_1.01,0.1168511986732483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,32,power_law_1.01,0.14410879611968994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,32,power_law_1.01,0.20577919483184814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,32,power_law_1.01,0.2634943962097168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,32,power_law_1.01,0.3608704090118408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,32,power_law_1.01,0.46622719764709475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,16,power_law_1.01,0.025216001272201537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,16,power_law_1.01,0.040780800580978396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,16,power_law_1.01,0.036320000886917114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,16,power_law_1.01,0.03781760036945343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,16,power_law_1.01,0.03790079951286316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,16,power_law_1.01,0.032364800572395325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,16,power_law_1.01,0.03619199991226196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,16,power_law_1.01,0.03852159976959228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,16,power_law_1.01,0.038764798641204835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,16,power_law_1.01,0.04012160003185272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,16,power_law_1.01,0.03889279961585999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,16,power_law_1.01,0.04017280042171478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,16,power_law_1.01,0.04128639996051788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,16,power_law_1.01,0.04917120039463043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,16,power_law_1.01,0.05322880148887634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,16,power_law_1.01,0.05271040201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,16,power_law_1.01,0.06964480280876159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,16,power_law_1.01,0.08479359745979309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,16,power_law_1.01,0.0871295988559723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,16,power_law_1.01,0.11860480308532714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,16,power_law_1.01,0.08474239706993103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,16,power_law_1.01,0.09855999946594238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,16,power_law_1.01,0.11235840320587158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,16,power_law_1.01,0.14883840084075928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,16,power_law_1.01,0.19107199907302858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,2,power_law_1.2,0.07228800058364868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,16,power_law_1.01,0.2684416055679321
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,16,power_law_1.01,0.34156160354614257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,2,power_law_1.2,0.08306559920310974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,2,power_law_1.2,0.08636800050735474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,2,power_law_1.2,0.10181119441986083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,2,power_law_1.2,0.11864960193634033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,2,power_law_1.2,0.13375999927520751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,2,power_law_1.2,0.1559999942779541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,2,power_law_1.2,0.15970560312271118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,2,power_law_1.2,0.16374399662017822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,2,power_law_1.2,0.17011200189590453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,2,power_law_1.2,0.165555202960968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,2,power_law_1.2,0.17500799894332886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,2,power_law_1.2,0.17241599559783935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,2,power_law_1.2,0.18403199911117554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,2,power_law_1.2,0.18997759819030763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,2,power_law_1.2,0.18963840007781982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,2,power_law_1.2,0.1995519995689392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,2,power_law_1.2,0.22468481063842774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,2,power_law_1.2,0.24688000679016114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,2,power_law_1.2,0.28304638862609866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,2,power_law_1.2,0.34410240650177004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,2,power_law_1.2,0.42045440673828127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,2,power_law_1.2,0.48978562355041505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,2,power_law_1.2,0.6876160144805908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,2,power_law_1.2,0.8797183990478515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,2,power_law_1.2,1.1794367790222169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,2,power_law_1.2,1.5177215576171874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,1,power_law_1.2,0.0517632007598877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,1,power_law_1.2,0.06418560147285461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,1,power_law_1.2,0.07756159901618957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,1,power_law_1.2,0.1023743987083435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,1,power_law_1.2,0.13382400274276735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,1,power_law_1.2,0.16495360136032106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,1,power_law_1.2,0.22501120567321778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,1,power_law_1.2,0.23562240600585938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,1,power_law_1.2,0.24542078971862794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,1,power_law_1.2,0.25383040904998777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,1,power_law_1.2,0.25598719120025637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,1,power_law_1.2,0.2755647897720337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,1,power_law_1.2,0.27905280590057374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,1,power_law_1.2,0.28691840171813965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,1,power_law_1.2,0.30147199630737304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,1,power_law_1.2,0.31623680591583253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,1,power_law_1.2,0.32578558921813966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,1,power_law_1.2,0.3619071960449219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,1,power_law_1.2,0.3796224117279053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,1,power_law_1.2,0.440780782699585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,1,power_law_1.2,0.49602560997009276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,1,power_law_1.2,0.6011072158813476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,1,power_law_1.2,0.701478385925293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,1,power_law_1.2,0.9070464134216308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,1,power_law_1.2,1.1193599700927734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,1,power_law_1.2,1.5213184356689453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,1,power_law_1.2,1.9082304000854493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,32,power_law_1.2,0.049772799015045166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,32,power_law_1.2,0.0444927990436554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,32,power_law_1.2,0.044870400428771974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,32,power_law_1.2,0.043910399079322815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,32,power_law_1.2,0.044172799587249754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,32,power_law_1.2,0.044736000895500186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,32,power_law_1.2,0.04522239863872528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,32,power_law_1.2,0.04649600088596344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,32,power_law_1.2,0.04599039852619171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,32,power_law_1.2,0.046982398629188536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,32,power_law_1.2,0.048640000820159915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,32,power_law_1.2,0.049344000220298764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,32,power_law_1.2,0.05018240213394165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,32,power_law_1.2,0.0523904025554657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,32,power_law_1.2,0.05624319911003113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,32,power_law_1.2,0.057920002937316896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,32,power_law_1.2,0.06313599944114685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,32,power_law_1.2,0.06996480226516724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,32,power_law_1.2,0.07246080040931702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,32,power_law_1.2,0.08465920090675354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,32,power_law_1.2,0.09694719910621644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,32,power_law_1.2,0.13048959970474244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,32,power_law_1.2,0.16689280271530152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,32,power_law_1.2,0.23601920604705812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,32,power_law_1.2,0.2880959987640381
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,32,power_law_1.2,0.41938557624816897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,32,power_law_1.2,0.5054783821105957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,2,power_law_1.01,0.05230720043182373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,2,power_law_1.01,0.06321920156478882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,2,power_law_1.01,0.07431039810180665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,2,power_law_1.01,0.08855040073394775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,2,power_law_1.01,0.10598399639129638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,2,power_law_1.01,0.12551679611206054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,2,power_law_1.01,0.15218559503555298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,2,power_law_1.01,0.15047680139541625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,2,power_law_1.01,0.15429760217666627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,2,power_law_1.01,0.1560960054397583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,2,power_law_1.01,0.15692800283432007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,2,power_law_1.01,0.16874239444732667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,2,power_law_1.01,0.17150720357894897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,2,power_law_1.01,0.17770880460739136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,2,power_law_1.01,0.18413439989089966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,2,power_law_1.01,0.19454079866409302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,2,power_law_1.01,0.20546560287475585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,2,power_law_1.01,0.23142399787902831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,2,power_law_1.01,0.24817919731140137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,2,power_law_1.01,0.291974401473999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,2,power_law_1.01,0.3226815938949585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,2,power_law_1.01,0.4099584102630615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,2,power_law_1.01,0.4936511993408203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,2,power_law_1.01,0.653056001663208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,2,power_law_1.01,0.8185983657836914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,2,power_law_1.01,1.1112768173217773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,2,power_law_1.01,1.4073727607727051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,16,power_law_1.01,0.059334397315979004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,16,power_law_1.01,0.07214080095291138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,16,power_law_1.01,0.07528319954872131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,16,power_law_1.01,0.0747648000717163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,16,power_law_1.01,0.07564799785614014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,16,power_law_1.01,0.07057920098304749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,16,power_law_1.01,0.06940799951553345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,16,power_law_1.01,0.07114880084991455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,16,power_law_1.01,0.07191039919853211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,16,power_law_1.01,0.07234560251235962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,16,power_law_1.01,0.07603840231895446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,16,power_law_1.01,0.07872639894485474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,16,power_law_1.01,0.08071039915084839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,16,power_law_1.01,0.08175359964370728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,16,power_law_1.01,0.08848000168800355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,16,power_law_1.01,0.08922880291938781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,16,power_law_1.01,0.08977280259132385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,16,power_law_1.01,0.09959040284156799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,16,power_law_1.01,0.10400639772415161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,16,power_law_1.01,0.11927679777145386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,16,power_law_1.01,0.12869759798049926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,16,power_law_1.01,0.15192960500717162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,16,power_law_1.01,0.179967999458313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,16,power_law_1.01,0.22417919635772704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,16,power_law_1.01,0.2893440008163452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,16,power_law_1.01,0.4070335865020752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,16,power_law_1.01,0.4859456062316895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,64,balanced,0.04990933338801066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,64,balanced,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,64,balanced,0.04009599983692169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,64,balanced,0.041696002086003624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,64,balanced,0.04192000130812327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,64,balanced,0.044064000248909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,64,balanced,0.04514666895071665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,64,balanced,0.045696000258127846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,64,balanced,0.04566933214664459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,64,balanced,0.04571733375390371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,64,balanced,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,64,balanced,0.04595200220743815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,64,balanced,0.04816000163555145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,64,balanced,0.04867733518282572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,64,balanced,0.05211733281612396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,64,balanced,0.054154664278030396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,64,balanced,0.05585066477457682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,64,balanced,0.0621013343334198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,64,balanced,0.06483200192451477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,64,balanced,0.07232533395290375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,64,balanced,0.08086400230725606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,64,balanced,0.09734933574994405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,64,balanced,0.11185600360234578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,64,balanced,0.1698240041732788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,64,balanced,0.20807466904322305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,64,balanced,0.2895786762237549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,64,balanced,0.3652000029881795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,4,power_law_1.01,0.040908798575401306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,4,power_law_1.01,0.05398399829864502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,4,power_law_1.01,0.06154239773750305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,4,power_law_1.01,0.0720192015171051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,4,power_law_1.01,0.07853440046310425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,4,power_law_1.01,0.07874559760093688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,4,power_law_1.01,0.10466560125350952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,4,power_law_1.01,0.09996799826622009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,4,power_law_1.01,0.10440319776535034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,4,power_law_1.01,0.0979200005531311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,4,power_law_1.01,0.10711679458618165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,4,power_law_1.01,0.10768640041351318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,4,power_law_1.01,0.10981760025024415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,4,power_law_1.01,0.11181440353393554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,4,power_law_1.01,0.12027519941329956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,4,power_law_1.01,0.11921919584274292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,4,power_law_1.01,0.12486399412155151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,4,power_law_1.01,0.13696639537811278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,4,power_law_1.01,0.14613120555877684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,4,power_law_1.01,0.16717439889907837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,4,power_law_1.01,0.18451199531555176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,4,power_law_1.01,0.2306368112564087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,4,power_law_1.01,0.27168641090393064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,4,power_law_1.01,0.36533119678497317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,4,power_law_1.01,0.42120962142944335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,4,power_law_1.01,0.6113984107971191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,4,power_law_1.01,0.7462207794189453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,2,balanced,0.04604800045490265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,2,balanced,0.04764266808827718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,2,balanced,0.049973333875338234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,2,balanced,0.06066666543483734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,2,balanced,0.08077866832415263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,2,balanced,0.12504000465075174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,2,balanced,0.12582932909329733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,2,balanced,0.12244266271591187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,2,balanced,0.1213653286298116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,2,balanced,0.12461333473523457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,2,balanced,0.12452266613642375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,2,balanced,0.1278986632823944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,2,balanced,0.12846400340398154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,2,balanced,0.1308693289756775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,2,balanced,0.13917332887649536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,2,balanced,0.14054399728775024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,2,balanced,0.14550933241844177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,2,balanced,0.15974400440851846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,2,balanced,0.1673120061556498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,2,balanced,0.19285867611567178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,2,balanced,0.2152000069618225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,2,balanced,0.28142400582631427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,2,balanced,0.30905065933863324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,2,balanced,0.4392746686935425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,2,balanced,0.5286826690038046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,2,balanced,0.7391413052876791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,2,balanced,0.9663413365681967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,8,power_law_1.01,0.0429504007101059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,8,power_law_1.01,0.047628799080848695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,8,power_law_1.01,0.04845440089702606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,8,power_law_1.01,0.05555840134620667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,8,power_law_1.01,0.05607039928436279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,8,power_law_1.01,0.05752320289611816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,8,power_law_1.01,0.055852800607681274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,8,power_law_1.01,0.057785600423812866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,8,power_law_1.01,0.0588096022605896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,8,power_law_1.01,0.05798400044441223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,8,power_law_1.01,0.062463998794555664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,8,power_law_1.01,0.06589440107345582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,8,power_law_1.01,0.06525440216064453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,8,power_law_1.01,0.06972159743309021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,4,power_law_1.2,0.04688639938831329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,8,power_law_1.01,0.07713279724121094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,8,power_law_1.01,0.0805184006690979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,4,power_law_1.2,0.04757120013237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,8,power_law_1.01,0.08295680284500122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,4,power_law_1.2,0.05090559720993042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,8,power_law_1.01,0.09588479995727539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,4,power_law_1.2,0.05804160237312317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,8,power_law_1.01,0.10249600410461426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,4,power_law_1.2,0.0617792010307312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,8,power_law_1.01,0.12558720111846924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,4,power_law_1.2,0.0656000018119812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,4,power_law_1.2,0.07991039752960205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,4,power_law_1.2,0.0769536018371582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,4,power_law_1.2,0.07623040080070495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,4,power_law_1.2,0.07765759825706482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,4,power_law_1.2,0.0809984028339386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,8,power_law_1.01,0.1392832040786743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,4,power_law_1.2,0.08478720188140869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,8,power_law_1.01,0.17777279615402222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,4,power_law_1.2,0.08863360285758973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,8,power_law_1.01,0.2180864095687866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,4,power_law_1.2,0.09015679955482483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,8,power_law_1.01,0.2786495923995972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,4,power_law_1.2,0.09872639775276185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,8,power_law_1.01,0.3521023988723755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,4,power_law_1.2,0.09970560073852539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,4,power_law_1.2,0.10252799987792968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,8,power_law_1.01,0.5013055801391602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,4,power_law_1.2,0.1149183988571167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,8,power_law_1.01,0.6799615859985352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,4,power_law_1.2,0.1308735966682434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,4,power_law_1.2,0.15534720420837403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,4,power_law_1.2,0.16248960494995118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,4,power_law_1.2,0.20888960361480713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,4,power_law_1.2,0.24803199768066406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,4,power_law_1.2,0.33980159759521483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,4,power_law_1.2,0.4322944164276123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,4,power_law_1.2,0.578329610824585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,4,power_law_1.2,0.7512639999389649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,128,balanced,0.04675200084845225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,128,balanced,0.057946667075157166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,128,balanced,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,128,balanced,0.045552000403404236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,128,balanced,0.04402133325735728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,128,balanced,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,128,balanced,0.04571733375390371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,128,balanced,0.04580800235271454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,128,balanced,0.046015997727712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,128,balanced,0.046821330984433494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,128,balanced,0.04571733375390371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,128,balanced,0.047914668917655945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,128,balanced,0.04786666731039683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,128,balanced,0.04970666766166687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,128,balanced,0.05153599878152212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,128,balanced,0.052154665191968284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,128,balanced,0.05382933219273885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,128,balanced,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,128,balanced,0.060821334520975746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,128,balanced,0.06640533109505971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,128,balanced,0.07050133248170216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,128,balanced,0.08542933066685994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,128,balanced,0.0972160001595815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,128,balanced,0.1225920021533966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,128,balanced,0.1525173286596934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,128,balanced,0.20454400777816772
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,128,balanced,0.2543413241704305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,2,power_law_1.2,0.0468095988035202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,2,power_law_1.2,0.057126402854919434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,2,power_law_1.2,0.06240000128746033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,2,power_law_1.2,0.07863039970397949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,2,power_law_1.2,0.09811199903488159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,2,power_law_1.2,0.11227519512176513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,2,power_law_1.2,0.14750720262527467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,2,power_law_1.2,0.1493183970451355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,2,power_law_1.2,0.14977920055389404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,2,power_law_1.2,0.15916800498962402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,2,power_law_1.2,0.1567296028137207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,2,power_law_1.2,0.166758394241333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,2,power_law_1.2,0.16951680183410645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,2,power_law_1.2,0.16895999908447265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,2,power_law_1.2,0.18082560300827027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,2,power_law_1.2,0.1812351942062378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,2,power_law_1.2,0.19877760410308837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,2,power_law_1.2,0.20901761054992676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,2,power_law_1.2,0.23843839168548583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,2,power_law_1.2,0.2791296005249023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,2,power_law_1.2,0.3001919984817505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,2,power_law_1.2,0.38179841041564944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,2,power_law_1.2,0.43025918006896974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,2,power_law_1.2,0.5492800235748291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,2,power_law_1.2,0.66844801902771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,2,power_law_1.2,0.9327168464660645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,2,power_law_1.2,1.1967424392700194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,1,balanced,0.060090666015942894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,1,balanced,0.07874133189519246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,1,balanced,0.11222400267918904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,1,balanced,0.17125866810480753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,1,balanced,0.29576534032821655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,1,balanced,0.5398133198420206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,1,balanced,0.5427466630935669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,1,balanced,0.5422613223393759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,1,balanced,0.5466773509979248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,1,balanced,0.5470186471939087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,1,balanced,0.5505760113398234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,1,balanced,0.5569760004679362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,1,balanced,0.5605546633402506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,1,balanced,0.5643626848856608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,1,balanced,0.571237325668335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,1,balanced,0.5787306626637777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,1,balanced,0.5859359900156657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,1,balanced,0.6142026583353678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,1,balanced,0.6363573471705118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,1,balanced,0.689680020014445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,1,balanced,0.7413919766743978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,1,balanced,0.8136906623840332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,1,balanced,0.9327840010325114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,1,balanced,1.1561919848124187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,1,balanced,1.2837653160095215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,1,balanced,1.8223199844360352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,1,balanced,2.153589407602946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,1,power_law_1.2,0.04984320104122162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,1,power_law_1.2,0.05315200090408325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,1,power_law_1.2,0.0584447979927063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,1,power_law_1.2,0.06884480118751526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,1,power_law_1.2,0.082777601480484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,1,power_law_1.2,0.09325439929962158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,1,power_law_1.2,0.11530239582061767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,1,power_law_1.2,0.11996159553527833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,1,power_law_1.2,0.12319359779357911
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,1,power_law_1.2,0.12802560329437257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,1,power_law_1.2,0.13445760011672975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,1,power_law_1.2,0.14197759628295897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,1,power_law_1.2,0.1488703966140747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,1,power_law_1.2,0.15887999534606934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,1,power_law_1.2,0.17385599613189698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,1,power_law_1.2,0.1839359998703003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,1,power_law_1.2,0.18298879861831666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,1,power_law_1.2,0.21622400283813475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,1,power_law_1.2,0.24437119960784912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,1,power_law_1.2,0.30474879741668703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,1,power_law_1.2,0.3597887992858887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,1,power_law_1.2,0.47694082260131837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,1,power_law_1.2,0.5837759971618652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,1,power_law_1.2,0.801632022857666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,1,power_law_1.2,1.021951961517334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,1,power_law_1.2,1.4605695724487304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,1,power_law_1.2,1.9056127548217774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,16,power_law_1.01,0.0825215995311737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,16,power_law_1.01,0.11050239801406861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,16,power_law_1.01,0.10401920080184937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,16,power_law_1.01,0.11014399528503419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,16,power_law_1.01,0.10772479772567749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,16,power_law_1.01,0.09844480156898498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,16,power_law_1.01,0.0955456018447876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,16,power_law_1.01,0.0986240029335022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,16,power_law_1.01,0.09864320158958435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,16,power_law_1.01,0.09646720290184022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,16,power_law_1.01,0.09726719856262207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,16,power_law_1.01,0.0933184027671814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,16,power_law_1.01,0.10026880502700805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,16,power_law_1.01,0.10008319616317748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,16,power_law_1.01,0.10317440032958984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,16,power_law_1.01,0.1032256007194519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,16,power_law_1.01,0.1078976035118103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,16,power_law_1.01,0.11572480201721191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,16,power_law_1.01,0.12324479818344117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,16,power_law_1.01,0.1405311942100525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,16,power_law_1.01,0.16190719604492188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,16,power_law_1.01,0.19040640592575073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,16,power_law_1.01,0.23305599689483641
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,16,power_law_1.01,0.2988863945007324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,16,power_law_1.01,0.36937599182128905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,16,power_law_1.01,0.5032639980316163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,16,power_law_1.01,0.6149312019348144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,16,power_law_1.01,0.01822720021009445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,16,power_law_1.01,0.019577600061893463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,16,power_law_1.01,0.02035840004682541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,16,power_law_1.01,0.019763199985027312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,16,power_law_1.01,0.02072319984436035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,16,power_law_1.01,0.02176000028848648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,16,power_law_1.01,0.023801599442958832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,16,power_law_1.01,0.02401279956102371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,16,power_law_1.01,0.02773120105266571
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,16,power_law_1.01,0.02784000039100647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,16,power_law_1.01,0.027193599939346315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,16,power_law_1.01,0.03992320001125336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,16,power_law_1.01,0.041126400232315063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,16,power_law_1.01,0.04097920060157776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,16,power_law_1.01,0.04373759925365448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,16,power_law_1.01,0.04259839951992035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,16,power_law_1.01,0.04445439875125885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,16,power_law_1.01,0.045184001326560974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,16,power_law_1.01,0.04490880072116852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,16,power_law_1.01,0.049721598625183105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,16,power_law_1.01,0.05542399883270264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,16,power_law_1.01,0.06839039921760559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,16,power_law_1.01,0.07929599881172181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,16,power_law_1.01,0.10491520166397095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,16,power_law_1.01,0.12426879405975341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,16,power_law_1.01,0.1678976058959961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,16,power_law_1.01,0.22109439373016357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,64,power_law_1.01,0.04752640128135681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,64,power_law_1.01,0.04190720021724701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,64,power_law_1.01,0.04164479970932007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,64,power_law_1.01,0.04436480104923248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,64,power_law_1.01,0.041740798950195314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,64,power_law_1.01,0.04225279986858368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,64,power_law_1.01,0.04312959909439087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,64,power_law_1.01,0.04465279877185822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,64,power_law_1.01,0.0449535995721817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,64,power_law_1.01,0.04496639966964722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,64,power_law_1.01,0.04469119906425476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,64,power_law_1.01,0.046054399013519286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,64,power_law_1.01,0.04652799963951111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,64,power_law_1.01,0.04686079919338226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,64,power_law_1.01,0.05223039984703064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,64,power_law_1.01,0.052665597200393675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,64,power_law_1.01,0.055193597078323366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,64,power_law_1.01,0.06099839806556702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,64,power_law_1.01,0.06584960222244263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,64,power_law_1.01,0.07681279778480529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,64,power_law_1.01,0.08047360181808472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,64,power_law_1.01,0.10592639446258545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,64,power_law_1.01,0.12225279808044434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,64,power_law_1.01,0.15841920375823976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,64,power_law_1.01,0.1987712025642395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,64,power_law_1.01,0.2698944091796875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,64,power_law_1.01,0.3249216079711914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,256,balanced,0.060090666015942894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,256,balanced,0.06643199920654297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,256,balanced,0.0435146689414978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,256,balanced,0.04560000201066335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,256,balanced,0.045552000403404236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,256,balanced,0.04789866507053375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,256,balanced,0.04614933331807455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,256,balanced,0.04580800235271454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,256,balanced,0.045850664377212524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,256,balanced,0.04758933186531067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,256,balanced,0.04558399816354116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,256,balanced,0.04818133513132731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,256,balanced,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,256,balanced,0.04986133178075155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,256,balanced,0.05171733101209005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,256,balanced,0.05305066704750061
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,256,balanced,0.05385600030422211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,256,balanced,0.0581226646900177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,256,balanced,0.06121600170930227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,256,balanced,0.06663999954859416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,256,balanced,0.07050666709740956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,256,balanced,0.08477333188056946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,256,balanced,0.09499733646710713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,256,balanced,0.12004799644152324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,256,balanced,0.15043733517328897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,256,balanced,0.20204800367355347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,256,balanced,0.25040533145268756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,4,balanced,0.0394400010506312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,4,balanced,0.041946664452552795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,4,balanced,0.04357333481311798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,4,balanced,0.043893332282702126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,4,balanced,0.04757866760094961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,4,balanced,0.056032001972198486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,4,balanced,0.05418133238951365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,4,balanced,0.05643199880917867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,4,balanced,0.05619200070699056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,4,balanced,0.05593066910902659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,4,balanced,0.05715199808279673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,4,balanced,0.05840000013510386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,4,balanced,0.05835199852784475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,4,balanced,0.057989334066708885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,4,balanced,0.06446933249632518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,4,balanced,0.06428266565004985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,4,balanced,0.06865600248177846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,4,balanced,0.0769760012626648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,4,balanced,0.09885332981745402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,4,balanced,0.11548800269762675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,4,balanced,0.13515200217564902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,4,balanced,0.1686506668726603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,4,balanced,0.18594133853912354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,4,balanced,0.27749866247177124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,4,balanced,0.33111466964085895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,4,balanced,0.47355198860168457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,4,balanced,0.6041066646575928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,2,power_law_1.01,0.08033279776573181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,2,power_law_1.01,0.11574399471282959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,2,power_law_1.01,0.1368064045906067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,2,power_law_1.01,0.19857920408248902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,2,power_law_1.01,0.2547264099121094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,2,power_law_1.01,0.3170367956161499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,2,power_law_1.01,0.4219071865081787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,2,power_law_1.01,0.4250944137573242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,2,power_law_1.01,0.46253437995910646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,2,power_law_1.01,0.4479936122894287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,2,power_law_1.01,0.4483007907867432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,2,power_law_1.01,0.46652798652648925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,2,power_law_1.01,0.4529088020324707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,2,power_law_1.01,0.487667179107666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,2,power_law_1.01,0.4998591899871826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,2,power_law_1.01,0.5111936092376709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,2,power_law_1.01,0.5462207794189453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,2,power_law_1.01,0.6000192165374756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,2,power_law_1.01,0.5843264102935791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,2,power_law_1.01,0.6730751991271973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,2,power_law_1.01,0.6737023830413819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,2,power_law_1.01,0.7898047924041748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,2,power_law_1.01,0.8435711860656738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,2,power_law_1.01,1.072704029083252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,2,power_law_1.01,1.1985664367675781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,2,power_law_1.01,1.586451244354248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,2,power_law_1.01,1.9969215393066406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,16,balanced,0.05002133548259735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,16,balanced,0.052426666021347046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,16,balanced,0.0521919975678126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,16,balanced,0.05818133552869161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,16,balanced,0.07109866539637248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,16,balanced,0.0928000013033549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,16,balanced,0.09382933378219604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,16,balanced,0.0956213374932607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,16,balanced,0.09502399961153667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,16,balanced,0.09638399879137675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,16,balanced,0.09597866733868916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,16,balanced,0.09656533598899841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,16,balanced,0.09748799602190654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,16,balanced,0.09835732976595561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,16,balanced,0.1050986647605896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,16,balanced,0.10682132840156555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,16,balanced,0.10845333337783813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,16,balanced,0.11331199606259663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,16,balanced,0.11850133538246155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,16,balanced,0.13080533345540366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,16,balanced,0.1400320033232371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,16,balanced,0.16699733336766562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,16,balanced,0.18796267112096152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,16,balanced,0.2462559938430786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,16,balanced,0.29268266757329303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,16,balanced,0.41204265753428143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,16,balanced,0.5275359948476156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,4,power_law_1.2,0.04405120015144348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,4,power_law_1.2,0.05480319857597351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,4,power_law_1.2,0.05761280059814453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,4,power_law_1.2,0.07552639842033386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,4,power_law_1.2,0.08154879808425904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,4,power_law_1.2,0.088646399974823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,4,power_law_1.2,0.09752960205078125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,4,power_law_1.2,0.1035264015197754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,4,power_law_1.2,0.10221439599990845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,4,power_law_1.2,0.10643199682235718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,4,power_law_1.2,0.10124160051345825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,4,power_law_1.2,0.11189759969711303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,4,power_law_1.2,0.10775680541992187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,4,power_law_1.2,0.11472640037536622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,4,power_law_1.2,0.12016639709472657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,4,power_law_1.2,0.11858559846878051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,4,power_law_1.2,0.12704000473022461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,4,power_law_1.2,0.1391487956047058
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,4,power_law_1.2,0.14967679977416992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,4,power_law_1.2,0.17331839799880983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,4,power_law_1.2,0.19230079650878906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,4,power_law_1.2,0.24412159919738768
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,4,power_law_1.2,0.276364803314209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,4,power_law_1.2,0.3664639949798584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,4,power_law_1.2,0.4583424091339111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,4,power_law_1.2,0.6153408050537109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,4,power_law_1.2,0.8307328224182129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,16,power_law_1.2,0.08110719919204712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,16,power_law_1.2,0.10632959604263306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,16,power_law_1.2,0.10238720178604126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,16,power_law_1.2,0.10750080347061157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,16,power_law_1.2,0.10788480043411255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,16,power_law_1.2,0.09192960262298584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,16,power_law_1.2,0.09393919706344604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,16,power_law_1.2,0.09311360120773315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,16,power_law_1.2,0.09575039744377137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,16,power_law_1.2,0.10200320482254029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,16,power_law_1.2,0.09212160110473633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,16,power_law_1.2,0.09976959824562073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,16,power_law_1.2,0.10047999620437623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,16,power_law_1.2,0.10051200389862061
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,16,power_law_1.2,0.10150400400161744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,16,power_law_1.2,0.10495359897613525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,16,power_law_1.2,0.11299840211868287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,16,power_law_1.2,0.12379519939422608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,16,power_law_1.2,0.1334272027015686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,16,power_law_1.2,0.14793599843978883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,16,power_law_1.2,0.16902400255203248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,16,power_law_1.2,0.20956799983978272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,16,power_law_1.2,0.27964799404144286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,16,power_law_1.2,0.3220223903656006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,16,power_law_1.2,0.4067840099334717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,16,power_law_1.2,0.5966911792755127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,16,power_law_1.2,0.6888448238372803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,16,power_law_1.2,0.04665600061416626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,16,power_law_1.2,0.05148800015449524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,16,power_law_1.2,0.04940159916877747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,16,power_law_1.2,0.05181440114974976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,16,power_law_1.2,0.051974397897720334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,16,power_law_1.2,0.0536191999912262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,16,power_law_1.2,0.05542399883270264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,16,power_law_1.2,0.05512319803237915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,16,power_law_1.2,0.055731201171875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,16,power_law_1.2,0.05632640123367309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,16,power_law_1.2,0.05665919780731201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,16,power_law_1.2,0.058361601829528806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,16,power_law_1.2,0.0578495979309082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,16,power_law_1.2,0.062003201246261595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,16,power_law_1.2,0.07023360133171082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,16,power_law_1.2,0.07152640223503112
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,16,power_law_1.2,0.07600640058517456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,16,power_law_1.2,0.08384640216827392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,16,power_law_1.2,0.09152640104293823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,16,power_law_1.2,0.1137279987335205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,16,power_law_1.2,0.13782399892807007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,16,power_law_1.2,0.17651840448379516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,16,power_law_1.2,0.21525120735168457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,16,power_law_1.2,0.29799039363861085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,16,power_law_1.2,0.39436800479888917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,16,power_law_1.2,0.6046271800994873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,16,power_law_1.2,0.7544640064239502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,8,power_law_1.2,0.05084159970283508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,8,power_law_1.2,0.05415679812431336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,8,power_law_1.2,0.05484799742698669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,8,power_law_1.2,0.06183680295944214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,8,power_law_1.2,0.06667519807815551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,8,power_law_1.2,0.0707647979259491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,8,power_law_1.2,0.07173759937286377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,8,power_law_1.2,0.06867200136184692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,8,power_law_1.2,0.07109760046005249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,8,power_law_1.2,0.07246080040931702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,8,power_law_1.2,0.07742080092430115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,8,power_law_1.2,0.07605119943618774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,8,power_law_1.2,0.07917439937591553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,8,power_law_1.2,0.08109440207481385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,8,power_law_1.2,0.0887935996055603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,8,power_law_1.2,0.08981760144233704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,8,power_law_1.2,0.09564800262451172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,8,power_law_1.2,0.10858240127563476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,8,power_law_1.2,0.11256320476531982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,8,power_law_1.2,0.13894399404525756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,8,power_law_1.2,0.16766719818115233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,8,power_law_1.2,0.19830399751663208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,8,power_law_1.2,0.23519361019134521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,8,power_law_1.2,0.30424959659576417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,8,power_law_1.2,0.3742847919464111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,8,power_law_1.2,0.537830400466919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,8,power_law_1.2,0.6781311988830566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,32,power_law_1.2,0.06340479850769043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,32,power_law_1.2,0.06114559769630432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,32,power_law_1.2,0.05999360084533691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,32,power_law_1.2,0.06181120276451111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,32,power_law_1.2,0.06099200248718262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,32,power_law_1.2,0.058847999572753905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,32,power_law_1.2,0.06175360083580017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,32,power_law_1.2,0.062015998363494876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,32,power_law_1.2,0.06151679754257202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,32,power_law_1.2,0.06209279894828797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,32,power_law_1.2,0.06420480012893677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,32,power_law_1.2,0.06493440270423889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,32,power_law_1.2,0.06471040248870849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,32,power_law_1.2,0.0661952018737793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,32,power_law_1.2,0.0707647979259491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,32,power_law_1.2,0.06956160068511963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,32,power_law_1.2,0.07319039702415467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,32,power_law_1.2,0.08067200183868409
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,32,power_law_1.2,0.0883840024471283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,32,power_law_1.2,0.10128639936447144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,32,power_law_1.2,0.11934080123901367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,32,power_law_1.2,0.15121279954910277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,32,power_law_1.2,0.17244800329208373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,32,power_law_1.2,0.23543040752410888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,32,power_law_1.2,0.2946880102157593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,32,power_law_1.2,0.42296319007873534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,32,power_law_1.2,0.5310143947601318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,16,power_law_1.2,0.04135040044784546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,16,power_law_1.2,0.052300798892974856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,16,power_law_1.2,0.04221439957618713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,16,power_law_1.2,0.044537600874900815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,16,power_law_1.2,0.04390400052070618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,16,power_law_1.2,0.045311999320983884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,16,power_law_1.2,0.04573439955711365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,16,power_law_1.2,0.04712960124015808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,16,power_law_1.2,0.047328001260757445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,16,power_law_1.2,0.04767360091209412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,16,power_law_1.2,0.04906879961490631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,16,power_law_1.2,0.050220799446105954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,16,power_law_1.2,0.05055999755859375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,16,power_law_1.2,0.05356799960136414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,16,power_law_1.2,0.05878400206565857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,16,power_law_1.2,0.06228479743003845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,16,power_law_1.2,0.06362239718437195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,16,power_law_1.2,0.07047039866447449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,16,power_law_1.2,0.07758079767227173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,16,power_law_1.2,0.09151999950408936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,16,power_law_1.2,0.10629119873046874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,16,power_law_1.2,0.14295680522918702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,16,power_law_1.2,0.18061439990997313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,16,power_law_1.2,0.24387199878692628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,16,power_law_1.2,0.32211201190948485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,16,power_law_1.2,0.42892799377441404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,16,power_law_1.2,0.6054912090301514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,1,power_law_1.2,0.024716800451278685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,1,power_law_1.2,0.037363201379776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,1,power_law_1.2,0.05459200143814087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,1,power_law_1.2,0.07992320060729981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,1,power_law_1.2,0.10736639499664306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,1,power_law_1.2,0.1416767954826355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,1,power_law_1.2,0.19676799774169923
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,1,power_law_1.2,0.21148159503936767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,1,power_law_1.2,0.2182528018951416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,1,power_law_1.2,0.21879680156707765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,1,power_law_1.2,0.22352640628814696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,1,power_law_1.2,0.23778560161590576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,1,power_law_1.2,0.24639360904693602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,1,power_law_1.2,0.25759360790252683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,1,power_law_1.2,0.27527680397033694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,1,power_law_1.2,0.2821311950683594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,1,power_law_1.2,0.2926079988479614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,1,power_law_1.2,0.3326080083847046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,1,power_law_1.2,0.3427776098251343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,1,power_law_1.2,0.41943039894104006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,1,power_law_1.2,0.43578238487243653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,1,power_law_1.2,0.5495232105255127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,1,power_law_1.2,0.5142208099365234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,1,power_law_1.2,0.676358413696289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,1,power_law_1.2,0.8280768394470215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,1,power_law_1.2,1.1539903640747071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,1,power_law_1.2,1.4849920272827148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,4,power_law_1.2,0.07496320009231568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,4,power_law_1.2,0.10568959712982177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,4,power_law_1.2,0.11407999992370606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,4,power_law_1.2,0.1364416003227234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,4,power_law_1.2,0.18732800483703613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,4,power_law_1.2,0.20332798957824708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,4,power_law_1.2,0.25125761032104493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,4,power_law_1.2,0.24712319374084474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,4,power_law_1.2,0.25461759567260744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,4,power_law_1.2,0.256166410446167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,4,power_law_1.2,0.2746880054473877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,4,power_law_1.2,0.27319040298461916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,4,power_law_1.2,0.26319360733032227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,4,power_law_1.2,0.268940806388855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,4,power_law_1.2,0.2865664005279541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,4,power_law_1.2,0.31132800579071046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,4,power_law_1.2,0.3147007942199707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,4,power_law_1.2,0.35333120822906494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,4,power_law_1.2,0.35107200145721434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,4,power_law_1.2,0.4103871822357178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,4,power_law_1.2,0.4015039920806885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,4,power_law_1.2,0.5163392066955567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,4,power_law_1.2,0.5351935863494873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,4,power_law_1.2,0.7364160060882569
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,4,power_law_1.2,0.7777535915374756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,4,power_law_1.2,0.9939264297485352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,4,power_law_1.2,1.3628224372863769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,1,balanced,0.05000533163547516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,1,balanced,0.059994667768478394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,1,balanced,0.07501866420110066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,1,balanced,0.11338133613268535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,1,balanced,0.17734400431315103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,1,balanced,0.30129067103068036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,1,balanced,0.30236266056696576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,1,balanced,0.30485333998998004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,1,balanced,0.30874133110046387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,1,balanced,0.30949334303538006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,1,balanced,0.31064534187316895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,1,balanced,0.31706666946411133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,1,balanced,0.3168906569480896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,1,balanced,0.3211466670036316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,1,balanced,0.3320320049921672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,1,balanced,0.33284799257914227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,1,balanced,0.3418186505635579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,1,balanced,0.3662399848302205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,1,balanced,0.38224534193674725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,1,balanced,0.4299466609954834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,1,balanced,0.47129066785176593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,1,balanced,0.5907946825027466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,1,balanced,0.6498879988988241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,1,balanced,0.8859519958496094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,1,balanced,1.0418399969736736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,1,balanced,1.4704373677571614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,1,balanced,1.8572907447814941
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,1,power_law_1.01,0.020403200387954713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,1,power_law_1.01,0.024128000438213348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,1,power_law_1.01,0.036396801471710205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,1,power_law_1.01,0.05188480019569397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,1,power_law_1.01,0.06943359971046448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,1,power_law_1.01,0.08817920088768005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,1,power_law_1.01,0.11327999830245972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,1,power_law_1.01,0.11755520105361938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,1,power_law_1.01,0.12276480197906495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,1,power_law_1.01,0.12558079957962037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,1,power_law_1.01,0.130457603931427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,1,power_law_1.01,0.13678719997406005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,1,power_law_1.01,0.13587839603424073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,1,power_law_1.01,0.14232319593429565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,1,power_law_1.01,0.14231040477752685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,1,power_law_1.01,0.15262080430984498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,1,power_law_1.01,0.1613312005996704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,1,power_law_1.01,0.18817919492721558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,1,power_law_1.01,0.1834944009780884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,1,power_law_1.01,0.23155839443206788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,1,power_law_1.01,0.24111359119415282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,1,power_law_1.01,0.31475200653076174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,1,power_law_1.01,0.3028223991394043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,1,power_law_1.01,0.3952575922012329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,1,power_law_1.01,0.48893442153930666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,1,power_law_1.01,0.6894080162048339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,1,power_law_1.01,0.8740287780761719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,4,balanced,0.06844266752401988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,4,balanced,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,4,balanced,0.0687360018491745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,4,balanced,0.0763679991165797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,4,balanced,0.09157333771387736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,4,balanced,0.11150399843851726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,4,balanced,0.11411199967066447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,4,balanced,0.116757333278656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,4,balanced,0.11270399888356526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,4,balanced,0.11590400338172913
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,4,balanced,0.11556266744931538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,4,balanced,0.1178559958934784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,4,balanced,0.12117866675059001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,4,balanced,0.11997866630554199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,4,balanced,0.12398933370908101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,4,balanced,0.12532800436019897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,4,balanced,0.12915199995040894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,4,balanced,0.13569066921869913
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,4,balanced,0.1444533367951711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,4,balanced,0.1665386656920115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,4,balanced,0.1800959904988607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,4,balanced,0.21384533246358237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,4,balanced,0.24958399931589761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,4,balanced,0.3643733263015747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,4,balanced,0.44569599628448486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,4,balanced,0.6401439905166626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,4,balanced,0.8254026571909586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,128,balanced,0.04568000137805939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,128,balanced,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,128,balanced,0.045941332976023354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,128,balanced,0.04806933303674062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,128,balanced,0.05008533100287119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,128,balanced,0.05169066786766052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,128,balanced,0.053674668073654175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,128,balanced,0.05385066568851471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,128,balanced,0.05386666456858317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,128,balanced,0.05259199937184652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,128,balanced,0.054117331902186074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,128,balanced,0.0562720000743866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,128,balanced,0.05585066477457682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,128,balanced,0.05603733162085215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,128,balanced,0.059903999169667564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,128,balanced,0.059877331058184304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,128,balanced,0.06246933341026306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,128,balanced,0.0680213322242101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,128,balanced,0.07056533296902974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,128,balanced,0.07857066889603932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,128,balanced,0.08708266417185466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,128,balanced,0.10107733805974324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,128,balanced,0.11935999989509583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,128,balanced,0.1575093368689219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,128,balanced,0.19243733088175455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,128,balanced,0.2623093326886495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,128,balanced,0.32815466324488324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,2,power_law_1.01,0.04424319863319397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,2,power_law_1.01,0.04783360064029694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,2,power_law_1.01,0.05246719717979431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,2,power_law_1.01,0.0665727972984314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,2,power_law_1.01,0.07663999795913697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,2,power_law_1.01,0.08352640271186829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,2,power_law_1.01,0.10372480154037475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,2,power_law_1.01,0.10611840486526489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,2,power_law_1.01,0.10804480314254761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,2,power_law_1.01,0.10935039520263672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,2,power_law_1.01,0.11210880279541016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,2,power_law_1.01,0.11720319986343383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,2,power_law_1.01,0.12038400173187255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,2,power_law_1.01,0.12535040378570556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,2,power_law_1.01,0.13338240385055541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,2,power_law_1.01,0.1372480034828186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,2,power_law_1.01,0.14016000032424927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,2,power_law_1.01,0.1535040020942688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,2,power_law_1.01,0.16933120489120485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,2,power_law_1.01,0.20099198818206787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,2,power_law_1.01,0.22978560924530028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,2,power_law_1.01,0.29612159729003906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,2,power_law_1.01,0.3370944023132324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,2,power_law_1.01,0.45664639472961427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,2,power_law_1.01,0.5836160182952881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,2,power_law_1.01,0.784287977218628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,2,power_law_1.01,1.0585599899291993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,128,balanced,0.050016000866889954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,128,balanced,0.050293331344922386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,128,balanced,0.05017599960168203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,128,balanced,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,128,balanced,0.053957333167394005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,128,balanced,0.058287998040517174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,128,balanced,0.05829866727193197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,128,balanced,0.06158400078614553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,128,balanced,0.05889600018660227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,128,balanced,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,128,balanced,0.05979733169078827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,128,balanced,0.05902933577696482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,128,balanced,0.060175999999046326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,128,balanced,0.06178133189678192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,128,balanced,0.0647573322057724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,128,balanced,0.06418666740258534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,128,balanced,0.06631466746330261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,128,balanced,0.0703413337469101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,128,balanced,0.0739573339621226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,128,balanced,0.08270399769147237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,128,balanced,0.09128000338872273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,128,balanced,0.1030453344186147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,128,balanced,0.12151466806729634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,128,balanced,0.1627840002377828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,128,balanced,0.19685333967208862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,128,balanced,0.2661919991175334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,128,balanced,0.3345760107040405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,16,balanced,0.04162666698296865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,16,balanced,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,16,balanced,0.0415040006240209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,16,balanced,0.044112001856168113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,16,balanced,0.045834665497144066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,16,balanced,0.06396799782911937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,16,balanced,0.06603200236956279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,16,balanced,0.06498133142789204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,16,balanced,0.06574399769306183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,16,balanced,0.0664106657107671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,16,balanced,0.06698133548100789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,16,balanced,0.0662613312403361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,16,balanced,0.06886933247248332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,16,balanced,0.07045333087444305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,16,balanced,0.07597866654396057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,16,balanced,0.077824001510938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,16,balanced,0.07938133180141449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,16,balanced,0.08689066767692566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,16,balanced,0.09240000446637471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,16,balanced,0.10434666275978088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,16,balanced,0.11784000198046367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,16,balanced,0.14242666959762573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,16,balanced,0.17312532663345337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,16,balanced,0.2273013393084208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,16,balanced,0.2645653287569682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,16,balanced,0.3654613494873047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,16,balanced,0.47066132227579754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,16,power_law_1.2,0.05727360248565674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,16,power_law_1.2,0.05991680026054382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,16,power_law_1.2,0.0521664023399353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,16,power_law_1.2,0.0516543984413147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,16,power_law_1.2,0.05241600275039673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,16,power_law_1.2,0.05246719717979431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,16,power_law_1.2,0.053465598821640016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,16,power_law_1.2,0.05363839864730835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,16,power_law_1.2,0.05364480018615723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,16,power_law_1.2,0.05377280116081238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,16,power_law_1.2,0.05545600056648255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,16,power_law_1.2,0.05618559718132019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,16,power_law_1.2,0.05736960172653198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,16,power_law_1.2,0.05862399935722351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,16,power_law_1.2,0.0640064001083374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,16,power_law_1.2,0.06542080044746398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,16,power_law_1.2,0.06866559982299805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,16,power_law_1.2,0.07969279885292054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,16,power_law_1.2,0.08557440042495727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,16,power_law_1.2,0.10095360279083251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,16,power_law_1.2,0.11004799604415894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,16,power_law_1.2,0.13545600175857545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,16,power_law_1.2,0.17560960054397584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,16,power_law_1.2,0.2395711898803711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,16,power_law_1.2,0.31943678855895996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,16,power_law_1.2,0.4474175930023193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,16,power_law_1.2,0.5334208011627197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,2,power_law_1.01,0.08794239759445191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,2,power_law_1.01,0.10376960039138794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,2,power_law_1.01,0.12019200325012207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,2,power_law_1.01,0.13704320192337036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,2,power_law_1.01,0.15480320453643798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,2,power_law_1.01,0.16953599452972412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,2,power_law_1.01,0.1958847999572754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,2,power_law_1.01,0.2124351978302002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,2,power_law_1.01,0.21902720928192138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,2,power_law_1.01,0.21163520812988282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,2,power_law_1.01,0.22444798946380615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,2,power_law_1.01,0.22419838905334472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,2,power_law_1.01,0.2269439935684204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,2,power_law_1.01,0.23697280883789062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,2,power_law_1.01,0.24401919841766356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,2,power_law_1.01,0.2454911947250366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,2,power_law_1.01,0.2514624118804932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,2,power_law_1.01,0.2722111940383911
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,2,power_law_1.01,0.28952960968017577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,2,power_law_1.01,0.3209151983261108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,8,power_law_1.01,0.02505599856376648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,2,power_law_1.01,0.36148478984832766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,8,power_law_1.01,0.038387200236320494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,2,power_law_1.01,0.4534592151641846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,8,power_law_1.01,0.04254080057144165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,8,power_law_1.01,0.04609920084476471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,8,power_law_1.01,0.05300480127334595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,8,power_law_1.01,0.049932798743247984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,8,power_law_1.01,0.05220479965209961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,8,power_law_1.01,0.05126399993896484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,2,power_law_1.01,0.5540863990783691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,8,power_law_1.01,0.06499199867248535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,8,power_law_1.01,0.06483200192451477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,2,power_law_1.01,0.7567935943603515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,8,power_law_1.01,0.058387202024459836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,8,power_law_1.01,0.08894079923629761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,2,power_law_1.01,0.9333312034606933
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,8,power_law_1.01,0.0837823987007141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,8,power_law_1.01,0.08218240141868591
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,2,power_law_1.01,1.3425279617309571
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,2,power_law_1.01,1.688422393798828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,8,power_law_1.01,0.09959679841995239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,8,power_law_1.01,0.1008255958557129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,8,power_law_1.01,0.10431360006332398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,8,power_law_1.01,0.13377920389175416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,8,power_law_1.01,0.11700479984283448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,8,power_law_1.01,0.11387519836425782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,8,power_law_1.01,0.12147200107574463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,8,power_law_1.01,0.1375040054321289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,8,power_law_1.01,0.14654719829559326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,8,power_law_1.01,0.20119040012359618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,8,power_law_1.01,0.23505280017852784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,8,power_law_1.01,0.31943678855895996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,8,power_law_1.01,0.4084671974182129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,64,power_law_1.2,0.04919680058956146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,64,power_law_1.2,0.04449920058250427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,64,power_law_1.2,0.04451839923858643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,64,power_law_1.2,0.04429439902305603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,64,power_law_1.2,0.04564479887485504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,64,power_law_1.2,0.046112000942230225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,64,power_law_1.2,0.0465472012758255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,64,power_law_1.2,0.04745599925518036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,64,power_law_1.2,0.04766719937324524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,64,power_law_1.2,0.04815999865531921
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,64,power_law_1.2,0.04880639910697937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,64,power_law_1.2,0.0499455988407135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,64,power_law_1.2,0.05087360143661499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,64,power_law_1.2,0.051846402883529666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,64,power_law_1.2,0.0592960000038147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,64,power_law_1.2,0.059411197900772095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,64,power_law_1.2,0.06242560148239136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,64,power_law_1.2,0.06962559819221496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,64,power_law_1.2,0.07771520018577575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,64,power_law_1.2,0.09125760197639465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,64,power_law_1.2,0.10198400020599366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,64,power_law_1.2,0.13278720378875733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,64,power_law_1.2,0.16171519756317138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,64,power_law_1.2,0.20784640312194824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,64,power_law_1.2,0.26750080585479735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,64,power_law_1.2,0.3699520111083984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,64,power_law_1.2,0.5016767978668213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,8,power_law_1.01,0.07440000176429748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,8,power_law_1.01,0.09635840058326721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,8,power_law_1.01,0.09666560292243957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,8,power_law_1.01,0.12100479602813721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,8,power_law_1.01,0.12181119918823242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,8,power_law_1.01,0.12689919471740724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,8,power_law_1.01,0.12974720001220702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,8,power_law_1.01,0.12466559410095215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,8,power_law_1.01,0.1253376007080078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,8,power_law_1.01,0.13036160469055175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,8,power_law_1.01,0.1308735966682434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,8,power_law_1.01,0.13224960565567018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,8,power_law_1.01,0.13351039886474608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,8,power_law_1.01,0.1379520058631897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,8,power_law_1.01,0.1399616003036499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,8,power_law_1.01,0.14012800455093383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,8,power_law_1.01,0.14391039609909057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,8,power_law_1.01,0.16054400205612182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,8,power_law_1.01,0.16395519971847533
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,8,power_law_1.01,0.18853759765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,8,power_law_1.01,0.21011199951171874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,8,power_law_1.01,0.2600320100784302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,8,power_law_1.01,0.3065407991409302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,8,power_law_1.01,0.430131196975708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,8,power_law_1.01,0.5527040004730225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,8,power_law_1.01,0.7184768199920655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,8,power_law_1.01,0.9921600341796875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,2,power_law_1.2,0.04317440092563629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,2,power_law_1.2,0.04560000002384186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,2,power_law_1.2,0.045664000511169436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,2,power_law_1.2,0.05128960013389587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,2,power_law_1.2,0.05548160076141358
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,2,power_law_1.2,0.06370559930801392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,2,power_law_1.2,0.070169597864151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,2,power_law_1.2,0.07385600209236146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,2,power_law_1.2,0.07534719705581665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,2,power_law_1.2,0.07525119781494141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,2,power_law_1.2,0.07552639842033386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,2,power_law_1.2,0.0803391993045807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,2,power_law_1.2,0.08536959886550903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,2,power_law_1.2,0.08791040182113648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,2,power_law_1.2,0.09364479780197144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,2,power_law_1.2,0.09537280201911927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,2,power_law_1.2,0.10660480260848999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,2,power_law_1.2,0.12412799596786499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,2,power_law_1.2,0.1322111964225769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,2,power_law_1.2,0.1709439992904663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,2,power_law_1.2,0.18995200395584105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,2,power_law_1.2,0.2504767894744873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,2,power_law_1.2,0.3116224050521851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,2,power_law_1.2,0.41432957649230956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,2,power_law_1.2,0.510361623764038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,2,power_law_1.2,0.7016255855560303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,1,power_law_1.01,0.050380802154541014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,2,power_law_1.2,0.9047360420227051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,1,power_law_1.01,0.052876800298690796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,1,power_law_1.01,0.05484799742698669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,1,power_law_1.01,0.06606720089912414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,1,power_law_1.01,0.07813760042190551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,1,power_law_1.01,0.08618239760398864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,1,power_law_1.01,0.10156799554824829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,1,power_law_1.01,0.1055232048034668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,1,power_law_1.01,0.10754560232162476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,1,power_law_1.01,0.11195520162582398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,1,power_law_1.01,0.11508480310440064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,1,power_law_1.01,0.12300800085067749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,1,power_law_1.01,0.1297727942466736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,1,power_law_1.01,0.13533439636230468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,1,power_law_1.01,0.1462399959564209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,1,power_law_1.01,0.15500799417495728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,1,power_law_1.01,0.1553536057472229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,1,power_law_1.01,0.18798719644546508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,1,power_law_1.01,0.20492799282073976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,1,power_law_1.01,0.26496639251708987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,1,power_law_1.01,0.32102398872375487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,1,power_law_1.01,0.42739200592041016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,1,power_law_1.01,0.5295680046081543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,1,power_law_1.01,0.737830400466919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,1,power_law_1.01,0.9466879844665528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,1,power_law_1.01,1.3664640426635741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,1,power_law_1.01,1.7767936706542968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,256,power_law_1.01,0.04620800018310547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,256,power_law_1.01,0.04717440009117126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,256,power_law_1.01,0.04657280147075653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,256,power_law_1.01,0.04938879907131195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,256,power_law_1.01,0.04968959987163544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,256,power_law_1.01,0.05050240159034729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,256,power_law_1.01,0.05121279954910278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,256,power_law_1.01,0.051123201847076416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,256,power_law_1.01,0.0517632007598877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,256,power_law_1.01,0.05130879878997803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,256,power_law_1.01,0.05245440006256104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,256,power_law_1.01,0.05306879878044128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,256,power_law_1.01,0.05306879878044128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,256,power_law_1.01,0.05330560207366943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,256,power_law_1.01,0.05763840079307556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,256,power_law_1.01,0.0572160005569458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,256,power_law_1.01,0.05913599729537964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,256,power_law_1.01,0.06673280000686646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,256,power_law_1.01,0.06938880085945129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,256,power_law_1.01,0.08072320222854615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,256,power_law_1.01,0.09165440201759338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,256,power_law_1.01,0.11208319664001465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,256,power_law_1.01,0.13155200481414794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,256,power_law_1.01,0.1757248044013977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,256,power_law_1.01,0.221779203414917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,256,power_law_1.01,0.30843520164489746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,256,power_law_1.01,0.3898943901062012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,64,balanced,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,64,balanced,0.023503998915354412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,64,balanced,0.022885332504908245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,64,balanced,0.02317333221435547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,64,balanced,0.025274666647116344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,64,balanced,0.027242665489514668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,64,balanced,0.03941866755485535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,64,balanced,0.03982399900754293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,64,balanced,0.0432533323764801
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,64,balanced,0.042352000872294106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,64,balanced,0.04190933207670847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,64,balanced,0.039781334499518074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,64,balanced,0.03551466763019562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,64,balanced,0.03348266581694285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,64,balanced,0.051856001218159996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,64,balanced,0.04913066824277242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,64,balanced,0.04302933315436045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,64,balanced,0.045754666129748024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,64,balanced,0.05587733288606008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,64,balanced,0.07090666890144348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,64,balanced,0.08807999889055888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,64,balanced,0.11327999830245972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,64,balanced,0.1430293321609497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,64,balanced,0.19967466592788696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,64,balanced,0.25647467374801636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,64,balanced,0.37147200107574463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,64,balanced,0.4873066743214925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,64,power_law_1.2,0.05794559717178345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,64,power_law_1.2,0.044844800233840944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,64,power_law_1.2,0.044409599900245664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,64,power_law_1.2,0.0473471999168396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,64,power_law_1.2,0.04700160026550293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,64,power_law_1.2,0.04801279902458191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,64,power_law_1.2,0.05004159808158874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,64,power_law_1.2,0.04932479858398438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,64,power_law_1.2,0.04906879961490631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,64,power_law_1.2,0.049081599712371825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,64,power_law_1.2,0.05026559829711914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,64,power_law_1.2,0.05135999917984009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,64,power_law_1.2,0.051532799005508424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,64,power_law_1.2,0.05325440168380737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,64,power_law_1.2,0.05744640231132507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,64,power_law_1.2,0.05788800120353699
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,64,power_law_1.2,0.0606656014919281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,64,power_law_1.2,0.06849279999732971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,64,power_law_1.2,0.07472000122070313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,64,power_law_1.2,0.08673279881477355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,64,power_law_1.2,0.09529600143432618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,64,power_law_1.2,0.12583680152893068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,64,power_law_1.2,0.1397312045097351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,64,power_law_1.2,0.18764159679412842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,64,power_law_1.2,0.24225919246673583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,64,power_law_1.2,0.33352320194244384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,64,power_law_1.2,0.4465663909912109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,32,power_law_1.2,0.05845119953155518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,32,power_law_1.2,0.052019202709198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,32,power_law_1.2,0.05012480020523071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,32,power_law_1.2,0.05303040146827698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,32,power_law_1.2,0.051744002103805545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,32,power_law_1.2,0.053376001119613645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,32,power_law_1.2,0.053401601314544675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,32,power_law_1.2,0.05604479908943176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,32,power_law_1.2,0.05373439788818359
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,32,power_law_1.2,0.05559039711952209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,32,power_law_1.2,0.055801600217819214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,32,power_law_1.2,0.05667200088500977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,32,power_law_1.2,0.05917440056800842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,32,power_law_1.2,0.059552001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,32,power_law_1.2,0.06407039761543273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,32,power_law_1.2,0.06821119785308838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,32,power_law_1.2,0.07227519750595093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,32,power_law_1.2,0.07916160225868225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,32,power_law_1.2,0.08988800048828124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,32,power_law_1.2,0.1059391975402832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,32,power_law_1.2,0.12914559841156006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,32,power_law_1.2,0.1602944016456604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,32,power_law_1.2,0.18598400354385375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,32,power_law_1.2,0.24512639045715331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,32,power_law_1.2,0.3057663917541504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,32,power_law_1.2,0.4820223808288574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,32,power_law_1.2,0.5803391933441162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,1,power_law_1.2,0.12844799757003783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,1,power_law_1.2,0.14513920545578002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,1,power_law_1.2,0.17311999797821045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,1,power_law_1.2,0.20020480155944825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,1,power_law_1.2,0.22818560600280763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,1,power_law_1.2,0.2643455982208252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,1,power_law_1.2,0.33123838901519775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,1,power_law_1.2,0.3510591983795166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,1,power_law_1.2,0.35907199382781985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,1,power_law_1.2,0.3640959978103638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,1,power_law_1.2,0.37134718894958496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,1,power_law_1.2,0.384499192237854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,1,power_law_1.2,0.39742720127105713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,1,power_law_1.2,0.4165503978729248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,1,power_law_1.2,0.41034879684448244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,1,power_law_1.2,0.42444801330566406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,1,power_law_1.2,0.4393472194671631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,1,power_law_1.2,0.4818751811981201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,1,power_law_1.2,0.5120255947113037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,1,power_law_1.2,0.5879936218261719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,1,power_law_1.2,0.6798399925231934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,1,power_law_1.2,0.8041664123535156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,1,power_law_1.2,0.967039966583252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,1,power_law_1.2,1.2835519790649415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,1,power_law_1.2,1.596332836151123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,1,power_law_1.2,2.2808063507080076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,1,power_law_1.2,2.952351951599121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,2,balanced,0.04370133578777313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,2,balanced,0.044069334864616394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,2,balanced,0.047781333327293396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,2,balanced,0.05022933085759481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,2,balanced,0.06651199857393901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,2,balanced,0.09572266538937886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,2,balanced,0.09699733058611552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,2,balanced,0.09771733482678731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,2,balanced,0.09852266311645508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,2,balanced,0.09894933303197224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,2,balanced,0.09726933638254802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,2,balanced,0.09820800026257832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,2,balanced,0.10128000378608704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,2,balanced,0.10335999727249146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,2,balanced,0.11053333679835002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,2,balanced,0.11142933368682861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,2,balanced,0.1163146694501241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,2,balanced,0.12905066212018332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,2,balanced,0.14076800147692362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,2,balanced,0.16054933269818625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,2,balanced,0.17907200256983438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,2,balanced,0.25726399819056195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,2,balanced,0.291701336701711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,2,balanced,0.42326398690541583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,2,balanced,0.5197973251342773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,2,balanced,0.7657600243886312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,2,balanced,0.9966293176015218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,8,balanced,0.03953066716591517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,8,balanced,0.03982933362325033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,8,balanced,0.04022933294375738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,8,balanced,0.04195199906826019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,8,balanced,0.04584000011285146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,8,balanced,0.05008533100287119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,8,balanced,0.04997866849104563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,8,balanced,0.0517493337392807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,8,balanced,0.05422399938106537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,8,balanced,0.0517546683549881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,8,balanced,0.05235733091831207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,8,balanced,0.053957333167394005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,8,balanced,0.05410666763782501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,8,balanced,0.052154665191968284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,8,balanced,0.05983999868233999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,8,balanced,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,8,balanced,0.06061333417892456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,8,balanced,0.0687360018491745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,8,balanced,0.07252266506354015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,8,balanced,0.08470933636029561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,8,balanced,0.09707199533780415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,8,balanced,0.12426666418711345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,8,balanced,0.14418133099873862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,8,balanced,0.18949333826700845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,8,balanced,0.23973333835601807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,8,balanced,0.3367413282394409
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,8,balanced,0.40831998984018963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,128,power_law_1.2,0.047968000173568726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,128,power_law_1.2,0.04818559885025024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,128,power_law_1.2,0.04693120121955872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,128,power_law_1.2,0.049158400297164916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,128,power_law_1.2,0.049420800805091855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,128,power_law_1.2,0.050316798686981204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,128,power_law_1.2,0.05106559991836548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,128,power_law_1.2,0.051052802801132204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,128,power_law_1.2,0.0523967981338501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,128,power_law_1.2,0.05178239941596985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,128,power_law_1.2,0.05279359817504883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,128,power_law_1.2,0.053414398431777955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,128,power_law_1.2,0.054201602935791016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,128,power_law_1.2,0.054604798555374146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,128,power_law_1.2,0.0590399980545044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,128,power_law_1.2,0.05920640230178833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,128,power_law_1.2,0.06217600107192993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,128,power_law_1.2,0.0685696005821228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,128,power_law_1.2,0.07375360131263733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,128,power_law_1.2,0.08495360016822814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,128,power_law_1.2,0.0976256012916565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,128,power_law_1.2,0.11831040382385254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,128,power_law_1.2,0.1411072015762329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,128,power_law_1.2,0.19043200016021727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,128,power_law_1.2,0.2466048002243042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,128,power_law_1.2,0.35143680572509767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,128,power_law_1.2,0.46053118705749513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,16,power_law_1.2,0.04816640019416809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,16,power_law_1.2,0.06522240042686463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,16,power_law_1.2,0.047814399003982544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,16,power_law_1.2,0.048742398619651794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,16,power_law_1.2,0.050380802154541014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,16,power_law_1.2,0.047295999526977536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,16,power_law_1.2,0.050860798358917235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,16,power_law_1.2,0.050457602739334105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,16,power_law_1.2,0.05103359818458557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,16,power_law_1.2,0.05090559720993042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,16,power_law_1.2,0.05123199820518494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,16,power_law_1.2,0.05381760001182556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,16,power_law_1.2,0.054995197057724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,16,power_law_1.2,0.05864319801330566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,16,power_law_1.2,0.06350719928741455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,16,power_law_1.2,0.06669440269470214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,16,power_law_1.2,0.0659775972366333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,16,power_law_1.2,0.07534719705581665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,16,power_law_1.2,0.08292480111122132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,16,power_law_1.2,0.09982720017433167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,16,power_law_1.2,0.11772160530090332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,16,power_law_1.2,0.15241600275039674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,16,power_law_1.2,0.2052544116973877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,16,power_law_1.2,0.25464959144592286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,16,power_law_1.2,0.32020480632781984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,16,power_law_1.2,0.4674816131591797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,16,power_law_1.2,0.6120895862579345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,32,power_law_1.2,0.04897280037403107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,32,power_law_1.2,0.04792959988117218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,32,power_law_1.2,0.04449920058250427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,32,power_law_1.2,0.046911999583244324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,32,power_law_1.2,0.04707199931144714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,32,power_law_1.2,0.047200000286102294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,32,power_law_1.2,0.04799999892711639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,32,power_law_1.2,0.049132800102233885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,32,power_law_1.2,0.0486272007226944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,32,power_law_1.2,0.05000960230827332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,32,power_law_1.2,0.05030400156974792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,32,power_law_1.2,0.050892800092697144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,32,power_law_1.2,0.05121920108795166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,32,power_law_1.2,0.053939199447631835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,32,power_law_1.2,0.05836799740791321
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,32,power_law_1.2,0.06028159856796265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,32,power_law_1.2,0.0635968029499054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,32,power_law_1.2,0.06951040029525757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,32,power_law_1.2,0.0756608009338379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,32,power_law_1.2,0.08775039911270141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,32,power_law_1.2,0.09807999730110169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,32,power_law_1.2,0.13069440126419068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,32,power_law_1.2,0.15139199495315553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,32,power_law_1.2,0.20752639770507814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,32,power_law_1.2,0.260479998588562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,32,power_law_1.2,0.3648000001907349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,32,power_law_1.2,0.4611968040466309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,16,power_law_1.2,0.040940800309181215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,16,power_law_1.2,0.046700799465179445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,16,power_law_1.2,0.03911679983139038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,16,power_law_1.2,0.04159359931945801
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,16,power_law_1.2,0.04270719885826111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,16,power_law_1.2,0.04307839870452881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,16,power_law_1.2,0.04286719858646393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,16,power_law_1.2,0.043731200695037845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,16,power_law_1.2,0.04426240026950836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,16,power_law_1.2,0.04483200013637543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,16,power_law_1.2,0.04510720074176788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,16,power_law_1.2,0.04643200039863586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,16,power_law_1.2,0.047116801142692566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,16,power_law_1.2,0.04792959988117218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,16,power_law_1.2,0.053311997652053834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,16,power_law_1.2,0.053932797908782956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,16,power_law_1.2,0.05829120278358459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,16,power_law_1.2,0.06446719765663148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,16,power_law_1.2,0.07083520293235779
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,16,power_law_1.2,0.08300799727439881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,16,power_law_1.2,0.09430400133132935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,16,power_law_1.2,0.11318399906158447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,16,power_law_1.2,0.15003520250320435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,16,power_law_1.2,0.2031167984008789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,16,power_law_1.2,0.23994879722595214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,16,power_law_1.2,0.3594304084777832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,16,power_law_1.2,0.4507584095001221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,64,power_law_1.01,0.05969280004501343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,64,power_law_1.01,0.04284160137176514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,64,power_law_1.01,0.04505600035190582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,64,power_law_1.01,0.047116801142692566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,64,power_law_1.01,0.04576640129089356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,64,power_law_1.01,0.047167998552322385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,64,power_law_1.01,0.04755200147628784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,64,power_law_1.01,0.048556798696517946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,64,power_law_1.01,0.047993600368499756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,64,power_law_1.01,0.04821119904518127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,64,power_law_1.01,0.052127999067306516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,64,power_law_1.01,0.05408639907836914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,64,power_law_1.01,0.05421440005302429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,64,power_law_1.01,0.05613440275192261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,64,power_law_1.01,0.06077439785003662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,64,power_law_1.01,0.06180480122566223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,64,power_law_1.01,0.06104959845542908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,64,power_law_1.01,0.06606720089912414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,64,power_law_1.01,0.06910719871520996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,64,power_law_1.01,0.0856000006198883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,64,power_law_1.01,0.09329919815063477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,64,power_law_1.01,0.11398400068283081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,64,power_law_1.01,0.1338688015937805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,64,power_law_1.01,0.1784000039100647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,64,power_law_1.01,0.2075968027114868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,64,power_law_1.01,0.2825344085693359
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,64,power_law_1.01,0.3623231887817383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,8,balanced,0.039594667653242745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,8,balanced,0.04381866753101349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,8,balanced,0.04173333446184794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,8,balanced,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,8,balanced,0.043605332573254905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,8,balanced,0.048197334011395775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,8,balanced,0.047770669062932335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,8,balanced,0.048058668772379555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,8,balanced,0.0497920016447703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,8,balanced,0.04836266736189524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,8,balanced,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,8,balanced,0.05147733290990194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,8,balanced,0.05180266499519348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,8,balanced,0.05027199784914652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,8,balanced,0.05604266623655955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,8,balanced,0.05600533386071523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,8,balanced,0.05796800057093302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,8,balanced,0.06412800153096516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,8,balanced,0.07028266787528992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,8,balanced,0.07901866734027863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,8,balanced,0.08843732873598735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,8,balanced,0.11742400129636128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,8,balanced,0.1386613349119822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,8,balanced,0.19110933939615884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,8,balanced,0.2653440038363139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,8,balanced,0.3853706518809001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,8,balanced,0.4580746491750081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,4,power_law_1.01,0.06602240204811097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,4,power_law_1.01,0.07887359857559204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,4,power_law_1.01,0.08869760036468506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,4,power_law_1.01,0.10448640584945679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,4,power_law_1.01,0.11449600458145141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,4,power_law_1.01,0.1299008011817932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,4,power_law_1.01,0.13841919898986815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,4,power_law_1.01,0.1350208044052124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,4,power_law_1.01,0.13765759468078614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,4,power_law_1.01,0.14206720590591432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,4,power_law_1.01,0.13404159545898436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,4,power_law_1.01,0.14163199663162232
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,4,power_law_1.01,0.13630720376968383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,4,power_law_1.01,0.14387840032577515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,4,power_law_1.01,0.1516095995903015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,4,power_law_1.01,0.15162880420684816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,4,power_law_1.01,0.15653120279312133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,4,power_law_1.01,0.16891520023345946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,4,power_law_1.01,0.1844928026199341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,4,power_law_1.01,0.20873599052429198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,4,power_law_1.01,0.2453887939453125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,4,power_law_1.01,0.29570560455322265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,4,power_law_1.01,0.34968318939208987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,4,power_law_1.01,0.4630271911621094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,4,power_law_1.01,0.5540736198425293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,4,power_law_1.01,0.8173760414123535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,4,power_law_1.01,1.047225570678711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,128,power_law_1.01,0.05383679866790771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,128,power_law_1.01,0.04940800070762634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,128,power_law_1.01,0.05377920269966126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,128,power_law_1.01,0.05243520140647888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,128,power_law_1.01,0.0542527973651886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,128,power_law_1.01,0.05137919783592224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,128,power_law_1.01,0.05185279846191406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,128,power_law_1.01,0.054118400812149046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,128,power_law_1.01,0.053855997323989865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,128,power_law_1.01,0.05398399829864502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,128,power_law_1.01,0.0546239972114563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,128,power_law_1.01,0.05533440113067627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,128,power_law_1.01,0.05676800012588501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,128,power_law_1.01,0.05729920268058777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,128,power_law_1.01,0.060159999132156375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,128,power_law_1.01,0.05999360084533691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,128,power_law_1.01,0.06250879764556885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,128,power_law_1.01,0.06753919720649719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,128,power_law_1.01,0.07297279834747314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,128,power_law_1.01,0.08577920198440551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,128,power_law_1.01,0.09983360171318054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,128,power_law_1.01,0.13100800514221192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,128,power_law_1.01,0.1592576026916504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,128,power_law_1.01,0.21395199298858641
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,128,power_law_1.01,0.2633471965789795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,128,power_law_1.01,0.3862720012664795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,128,power_law_1.01,0.49585280418395994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,4,power_law_1.2,0.06519039869308471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,4,power_law_1.2,0.077292799949646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,4,power_law_1.2,0.08010879755020142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,4,power_law_1.2,0.09843840003013611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,4,power_law_1.2,0.11139199733734131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,4,power_law_1.2,0.13516160249710082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,4,power_law_1.2,0.13950079679489136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,4,power_law_1.2,0.13957120180130006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,4,power_law_1.2,0.14666240215301513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,4,power_law_1.2,0.14069759845733643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,4,power_law_1.2,0.13381760120391845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,4,power_law_1.2,0.1451200008392334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,4,power_law_1.2,0.14282239675521852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,4,power_law_1.2,0.1462272047996521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,4,power_law_1.2,0.155404794216156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,4,power_law_1.2,0.1636415958404541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,4,power_law_1.2,0.16437760591506959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,4,power_law_1.2,0.1732480049133301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,4,power_law_1.2,0.1945855975151062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,4,power_law_1.2,0.22633600234985352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,4,power_law_1.2,0.25303680896759034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,4,power_law_1.2,0.3050240039825439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,4,power_law_1.2,0.35890560150146483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,4,power_law_1.2,0.4805439949035645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,4,power_law_1.2,0.6584703922271729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,4,power_law_1.2,0.8629568099975586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,4,power_law_1.2,1.1085311889648437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,2,balanced,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,2,balanced,0.02869333326816559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,2,balanced,0.04423466821511587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,2,balanced,0.060831998785336815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,2,balanced,0.09504533807436626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,2,balanced,0.16261866688728333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,2,balanced,0.16801599661509195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,2,balanced,0.1697546641031901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,2,balanced,0.1731733282407125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,2,balanced,0.17690134048461914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,2,balanced,0.17826666434605917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,2,balanced,0.1832053263982137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,2,balanced,0.18952532609303793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,2,balanced,0.19088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,2,balanced,0.19960000117619833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,2,balanced,0.20837332804997763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,2,balanced,0.24207466840744019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,2,balanced,0.2587626576423645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,2,balanced,0.24075732628504434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,2,balanced,0.3208906650543213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,2,balanced,0.2696160078048706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,2,balanced,0.447706659634908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,2,balanced,0.5616480112075806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,2,balanced,0.5064800182978312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,2,balanced,0.4901119867960612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,2,balanced,0.9646613597869873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,2,balanced,0.8869813283284506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,16,power_law_1.2,0.054553598165512085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,16,power_law_1.2,0.05449600219726562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,16,power_law_1.2,0.05122560262680054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,16,power_law_1.2,0.05560960173606873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,16,power_law_1.2,0.055353599786758426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,16,power_law_1.2,0.05475199818611145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,16,power_law_1.2,0.05541759729385376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,16,power_law_1.2,0.057625597715377806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,16,power_law_1.2,0.05508480072021484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,16,power_law_1.2,0.05607680082321167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,16,power_law_1.2,0.058303999900817874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,16,power_law_1.2,0.05842559933662415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,1,power_law_1.2,0.05262079834938049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,1,power_law_1.2,0.054611200094223024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,1,power_law_1.2,0.05899519920349121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,1,power_law_1.2,0.06849920153617858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,1,power_law_1.2,0.08177279829978942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,16,power_law_1.2,0.06047359704971313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,1,power_law_1.2,0.0948415994644165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,16,power_law_1.2,0.062483197450637816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,16,power_law_1.2,0.0693120002746582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,16,power_law_1.2,0.07066879868507385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,16,power_law_1.2,0.07621759772300721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,16,power_law_1.2,0.08376320004463196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,16,power_law_1.2,0.0946175992488861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,16,power_law_1.2,0.11344640254974366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,1,power_law_1.2,0.11390080451965331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,16,power_law_1.2,0.1277567982673645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,1,power_law_1.2,0.1213312029838562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,16,power_law_1.2,0.16416000127792357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,1,power_law_1.2,0.12216960191726685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,16,power_law_1.2,0.19191039800643922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,1,power_law_1.2,0.12714879512786864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,16,power_law_1.2,0.2446592092514038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,1,power_law_1.2,0.1314239978790283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,16,power_law_1.2,0.3015615940093994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,1,power_law_1.2,0.1379520058631897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,16,power_law_1.2,0.5034175872802734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,1,power_law_1.2,0.1397503972053528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,16,power_law_1.2,0.6022079944610595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,1,power_law_1.2,0.14395519495010375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,1,power_law_1.2,0.14953600168228148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,1,power_law_1.2,0.1583359956741333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,1,power_law_1.2,0.18421119451522827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,1,power_law_1.2,0.20906240940093995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,1,power_law_1.2,0.2319488048553467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,1,power_law_1.2,0.2821311950683594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,1,power_law_1.2,0.3197695970535278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,1,power_law_1.2,0.4200128078460693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,1,power_law_1.2,0.5174272060394287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,1,power_law_1.2,0.6964479923248291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,1,power_law_1.2,0.8908224105834961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,1,power_law_1.2,1.2587136268615722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,1,power_law_1.2,1.6382144927978515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,2,power_law_1.01,0.04639999866485596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,2,power_law_1.01,0.04663679897785187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,2,power_law_1.01,0.04835200011730194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,2,power_law_1.01,0.0590399980545044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,2,power_law_1.01,0.06659839749336242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,2,power_law_1.01,0.07447680234909057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,2,power_law_1.01,0.0825984001159668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,2,power_law_1.01,0.08919039964675904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,2,power_law_1.01,0.08772479891777038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,2,power_law_1.01,0.08947839736938476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,2,power_law_1.01,0.09102079868316651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,2,power_law_1.01,0.09602559804916382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,2,power_law_1.01,0.09565439820289612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,2,power_law_1.01,0.09987199902534485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,2,power_law_1.01,0.1079103946685791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,2,power_law_1.01,0.11048959493637085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,2,power_law_1.01,0.12620160579681397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,2,power_law_1.01,0.14816639423370362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,2,power_law_1.01,0.1553663969039917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,2,power_law_1.01,0.18209919929504395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,2,power_law_1.01,0.21206400394439698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,2,power_law_1.01,0.2643712043762207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,2,power_law_1.01,0.3262847900390625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,2,power_law_1.01,0.44381442070007326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,2,power_law_1.01,0.5659327983856202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,2,power_law_1.01,0.8401151657104492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,2,power_law_1.01,1.094598388671875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,32,balanced,0.04011200120051702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,32,balanced,0.04167466859022776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,32,balanced,0.039936001102129616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,32,balanced,0.04351999859015147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,32,balanced,0.043621331453323364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,32,balanced,0.06402666866779327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,32,balanced,0.06247999767462412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,32,balanced,0.06406400104363759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,32,balanced,0.06453333298365276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,32,balanced,0.06401066482067108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,32,balanced,0.06399466594060262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,32,balanced,0.06608533362547557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,32,balanced,0.06617600222428639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,32,balanced,0.06692266464233398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,32,balanced,0.07045866549015045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,32,balanced,0.07260266443093617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,32,balanced,0.0707946668068568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,32,balanced,0.07708266874154408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,32,balanced,0.08088533580303192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,32,balanced,0.08868267138799031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,32,balanced,0.09503466884295146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,32,balanced,0.11069867014884949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,32,balanced,0.123690664768219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,32,balanced,0.15897599856058756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,32,balanced,0.18736533323923746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,32,balanced,0.24494399627049765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,32,balanced,0.31382399797439575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,16,power_law_1.01,0.04509440064430237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,16,power_law_1.01,0.05180799961090088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,16,power_law_1.01,0.04284160137176514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,16,power_law_1.01,0.045587199926376346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,16,power_law_1.01,0.04490880072116852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,16,power_law_1.01,0.0448063999414444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,16,power_law_1.01,0.04570879936218262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,16,power_law_1.01,0.0458624005317688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,16,power_law_1.01,0.04645760059356689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,16,power_law_1.01,0.04648320078849792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,16,power_law_1.01,0.04692479968070984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,16,power_law_1.01,0.04821119904518127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,16,power_law_1.01,0.048556798696517946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,16,power_law_1.01,0.04926080107688904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,16,power_law_1.01,0.05430399775505066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,16,power_law_1.01,0.05441280007362366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,16,power_law_1.01,0.05727999806404114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,16,power_law_1.01,0.06460800170898437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,16,power_law_1.01,0.06991999745368957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,16,power_law_1.01,0.08115839958190918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,16,power_law_1.01,0.08930559754371643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,16,power_law_1.01,0.11304320096969604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,16,power_law_1.01,0.14186880588531495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,16,power_law_1.01,0.18197120428085328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,16,power_law_1.01,0.22252159118652343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,16,power_law_1.01,0.3144383907318115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,16,power_law_1.01,0.4072127819061279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,128,power_law_1.2,0.01839359998703003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,128,power_law_1.2,0.020153599977493285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,128,power_law_1.2,0.01834239959716797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,128,power_law_1.2,0.02014079988002777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,128,power_law_1.2,0.023161600530147552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,128,power_law_1.2,0.03632639944553375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,128,power_law_1.2,0.033030399680137636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,128,power_law_1.2,0.03370879888534546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,128,power_law_1.2,0.03298560082912445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,128,power_law_1.2,0.032767999172210696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,128,power_law_1.2,0.03383040130138397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,128,power_law_1.2,0.033504000306129454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,128,power_law_1.2,0.03437440097332001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,128,power_law_1.2,0.03199360072612763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,128,power_law_1.2,0.032076799869537355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,128,power_law_1.2,0.033958399295806886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,128,power_law_1.2,0.0349375993013382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,128,power_law_1.2,0.03566080033779144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,128,power_law_1.2,0.039027199149131775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,128,power_law_1.2,0.050387197732925416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,128,power_law_1.2,0.05445759892463684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,128,power_law_1.2,0.07188479900360108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,128,power_law_1.2,0.08698880076408386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,128,power_law_1.2,0.1181439995765686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,128,power_law_1.2,0.15765119791030885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,128,power_law_1.2,0.2306879997253418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,128,power_law_1.2,0.276691198348999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,128,power_law_1.01,0.05939840078353882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,128,power_law_1.01,0.07364479899406433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,128,power_law_1.01,0.041875201463699344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,128,power_law_1.01,0.04223999977111816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,128,power_law_1.01,0.04472959935665131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,128,power_law_1.01,0.04622080028057098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,128,power_law_1.01,0.04736000001430511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,128,power_law_1.01,0.04818559885025024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,128,power_law_1.01,0.04842239916324616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,128,power_law_1.01,0.04906240105628967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,128,power_law_1.01,0.05114240050315857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,128,power_law_1.01,0.053887999057769774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,128,power_law_1.01,0.053324800729751584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,128,power_law_1.01,0.055078399181365964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,128,power_law_1.01,0.0597760021686554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,128,power_law_1.01,0.06543359756469727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,128,power_law_1.01,0.06109439730644226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,128,power_law_1.01,0.06954879760742187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,128,power_law_1.01,0.06899200081825256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,128,power_law_1.01,0.08604159951210022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,128,power_law_1.01,0.09354879856109619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,128,power_law_1.01,0.12264959812164307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,128,power_law_1.01,0.13570560216903688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,128,power_law_1.01,0.17775360345840455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,128,power_law_1.01,0.22059519290924073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,128,power_law_1.01,0.2807744026184082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,128,power_law_1.01,0.40129919052124025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,256,power_law_1.2,0.047167998552322385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,256,power_law_1.2,0.048076799511909483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,256,power_law_1.2,0.04751999974250794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,256,power_law_1.2,0.050470399856567386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,256,power_law_1.2,0.04927360117435455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,256,power_law_1.2,0.05136640071868896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,256,power_law_1.2,0.05149440169334411
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,256,power_law_1.2,0.052121597528457644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,256,power_law_1.2,0.05231999754905701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,256,power_law_1.2,0.05183359980583191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,256,power_law_1.2,0.0538752019405365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,256,power_law_1.2,0.05370240211486817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,256,power_law_1.2,0.05381760001182556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,256,power_law_1.2,0.05382400155067444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,256,power_law_1.2,0.05799040198326111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,256,power_law_1.2,0.056908798217773435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,256,power_law_1.2,0.06296319961547851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,256,power_law_1.2,0.06734079718589783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,256,power_law_1.2,0.07230079770088196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,256,power_law_1.2,0.08481919765472412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,256,power_law_1.2,0.09641600251197815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,256,power_law_1.2,0.121452796459198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,256,power_law_1.2,0.14711040258407593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,256,power_law_1.2,0.2089792013168335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,256,power_law_1.2,0.26696319580078126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,256,power_law_1.2,0.3646015882492065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,256,power_law_1.2,0.4825024127960205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,256,balanced,0.04790399968624115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,256,balanced,0.048581331968307495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,256,balanced,0.04821333289146423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,256,balanced,0.050250664353370667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,256,balanced,0.052186667919158936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,256,balanced,0.052373334765434265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,256,balanced,0.05223466455936432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,256,balanced,0.053957333167394005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,256,balanced,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,256,balanced,0.054048001766204834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,256,balanced,0.0554720014333725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,256,balanced,0.055813332398732506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,256,balanced,0.054117331902186074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,256,balanced,0.05773866673310598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,256,balanced,0.0602400004863739
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,256,balanced,0.0602453351020813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,256,balanced,0.06238933404286703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,256,balanced,0.0682773341735204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,256,balanced,0.07005866865317027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,256,balanced,0.07891199986139934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,256,balanced,0.08703466256459554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,256,balanced,0.10116799672444661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,256,balanced,0.11763200163841248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,256,balanced,0.15730667114257812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,256,balanced,0.1902986764907837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,256,balanced,0.2590293288230896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,256,balanced,0.32308799028396606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,2,power_law_1.2,0.10721280574798583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,2,power_law_1.2,0.13075200319290162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,2,power_law_1.2,0.14099839925765992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,2,power_law_1.2,0.17534719705581664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,2,power_law_1.2,0.2041088104248047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,2,power_law_1.2,0.23741440773010253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,2,power_law_1.2,0.3061568021774292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,2,power_law_1.2,0.31705598831176757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,2,power_law_1.2,0.32663679122924805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,2,power_law_1.2,0.34225280284881593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,2,power_law_1.2,0.3453759908676147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,2,power_law_1.2,0.356441593170166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,2,power_law_1.2,0.35489280223846437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,2,power_law_1.2,0.36062719821929934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,2,power_law_1.2,0.37005441188812255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,2,power_law_1.2,0.37985279560089114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,2,power_law_1.2,0.38747520446777345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,2,power_law_1.2,0.4231935977935791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,2,power_law_1.2,0.44775681495666503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,2,power_law_1.2,0.49634561538696287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,2,power_law_1.2,0.5514880180358886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,2,power_law_1.2,0.656390380859375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,1,power_law_1.2,0.0507968008518219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,1,power_law_1.2,0.0564736008644104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,2,power_law_1.2,0.781932783126831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,1,power_law_1.2,0.06330879926681518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,2,power_law_1.2,1.0526528358459473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,1,power_law_1.2,0.08409600257873535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,1,power_law_1.2,0.10354559421539307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,2,power_law_1.2,1.2766783714294434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,1,power_law_1.2,0.1218559980392456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,1,power_law_1.2,0.15488640069961548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,2,power_law_1.2,1.7828479766845704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,1,power_law_1.2,0.16455039978027344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,1,power_law_1.2,0.1709247946739197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,1,power_law_1.2,0.17495039701461793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,2,power_law_1.2,2.5235712051391603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,1,power_law_1.2,0.1843008041381836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,1,power_law_1.2,0.19028480052948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,1,power_law_1.2,0.19548159837722778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,1,power_law_1.2,0.20472960472106932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,1,power_law_1.2,0.21663999557495117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,1,power_law_1.2,0.22700159549713134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,1,power_law_1.2,0.23778560161590576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,1,power_law_1.2,0.27500159740448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,1,power_law_1.2,0.29644160270690917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,1,power_law_1.2,0.372704005241394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,1,power_law_1.2,0.42094078063964846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,1,power_law_1.2,0.5459712028503418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,1,power_law_1.2,0.6473152160644531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,1,power_law_1.2,0.879923152923584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,1,power_law_1.2,1.1028032302856445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,1,power_law_1.2,1.5470463752746582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,1,power_law_1.2,1.987980842590332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,16,balanced,0.04274133344491323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,16,balanced,0.04154133299986521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,16,balanced,0.04357333481311798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,16,balanced,0.04354133208592733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,16,balanced,0.046682665745417275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,16,balanced,0.06061333417892456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,16,balanced,0.06261866788069408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,16,balanced,0.06445333361625671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,16,balanced,0.06412800153096516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,16,balanced,0.06538666784763336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,16,balanced,0.06417599817117055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,16,balanced,0.06549333532651265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,16,balanced,0.06611733138561249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,16,balanced,0.06718400120735168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,16,balanced,0.07061866422494252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,16,balanced,0.07246399919191997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,16,balanced,0.07430399954319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,16,balanced,0.07843199868996938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,16,balanced,0.08078399797280629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,16,balanced,0.09061333537101746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,16,balanced,0.10162132978439331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,16,balanced,0.11949333548545837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,16,balanced,0.13193066914876303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,16,balanced,0.17221333583196005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,16,balanced,0.20429333051045737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,16,balanced,0.2748853365580241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,16,balanced,0.3487306833267212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,32,balanced,0.04610133171081543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,32,balanced,0.04587199787298838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,32,balanced,0.04504533112049103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,32,balanced,0.04786133269468943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,32,balanced,0.05162133276462555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,32,balanced,0.05569066603978475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,32,balanced,0.05657066901524862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,32,balanced,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,32,balanced,0.05810666580994924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,32,balanced,0.057301332553227745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,32,balanced,0.057999998331069946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,32,balanced,0.058917333682378135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,32,balanced,0.05805333455403646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,32,balanced,0.06076266864935557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,32,balanced,0.06434133152167003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,32,balanced,0.06628799935181935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,32,balanced,0.06806933383146922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,32,balanced,0.07254933317502339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,32,balanced,0.0749066670735677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,32,balanced,0.08794132868448894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,32,balanced,0.10219200452168782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,32,balanced,0.12572800119717917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,32,balanced,0.14478400349617004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,32,balanced,0.18530132373174033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,32,balanced,0.20853332678476968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,32,balanced,0.28513065973917645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,32,balanced,0.35361599922180176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,8,power_law_1.2,0.052697598934173584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,8,power_law_1.2,0.06209279894828797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,8,power_law_1.2,0.06223359704017639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,8,power_law_1.2,0.07819520235061646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,8,power_law_1.2,0.07916799783706666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,8,power_law_1.2,0.07414399981498718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,8,power_law_1.2,0.09056000113487243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,8,power_law_1.2,0.08972160220146179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,8,power_law_1.2,0.0880511999130249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,8,power_law_1.2,0.08640639781951905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,8,power_law_1.2,0.08967040181159973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,8,power_law_1.2,0.09308159947395325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,8,power_law_1.2,0.09413120150566101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,8,power_law_1.2,0.10096640586853027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,8,power_law_1.2,0.10485119819641113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,8,power_law_1.2,0.11012480258941651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,8,power_law_1.2,0.11473920345306396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,8,power_law_1.2,0.12597119808197021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,8,power_law_1.2,0.13872640132904052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,8,power_law_1.2,0.16028800010681152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,8,power_law_1.2,0.18188159465789794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,8,power_law_1.2,0.22257919311523439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,8,power_law_1.2,0.272652792930603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,8,power_law_1.2,0.3858943939208984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,8,power_law_1.2,0.44615678787231444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,8,power_law_1.2,0.5984127998352051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,8,power_law_1.2,0.7870848178863525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,2,power_law_1.2,0.07649279832839966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,2,power_law_1.2,0.11208959817886352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,2,power_law_1.2,0.1242751955986023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,2,power_law_1.2,0.18502399921417237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,2,power_law_1.2,0.24357759952545166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,2,power_law_1.2,0.3170176029205322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,2,power_law_1.2,0.40613760948181155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,2,power_law_1.2,0.4288640022277832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,2,power_law_1.2,0.43999361991882324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,2,power_law_1.2,0.4441728115081787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,2,power_law_1.2,0.45991039276123047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,2,power_law_1.2,0.4630080223083496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,2,power_law_1.2,0.4540736198425293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,2,power_law_1.2,0.4788479804992676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,2,power_law_1.2,0.5206399917602539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,2,power_law_1.2,0.5345600128173829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,2,power_law_1.2,0.5566592216491699
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,2,power_law_1.2,0.6288064002990723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,2,power_law_1.2,0.6192319869995118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,2,power_law_1.2,0.6913407802581787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,2,power_law_1.2,0.72674560546875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,2,power_law_1.2,0.820025634765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,2,power_law_1.2,0.9522944450378418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,2,power_law_1.2,1.1406335830688477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,2,power_law_1.2,1.2882880210876464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,2,power_law_1.2,1.6374399185180664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,2,power_law_1.2,2.249062347412109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,64,power_law_1.2,0.045510399341583255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,64,power_law_1.2,0.043910399079322815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,64,power_law_1.2,0.044019201397895814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,64,power_law_1.2,0.04750719964504242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,64,power_law_1.2,0.048793599009513855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,64,power_law_1.2,0.049055999517440795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,64,power_law_1.2,0.049644801020622256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,64,power_law_1.2,0.051072001457214355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,64,power_law_1.2,0.054688000679016115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,64,power_law_1.2,0.05500800013542175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,64,power_law_1.2,0.05566080212593079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,64,power_law_1.2,0.05539199709892273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,64,power_law_1.2,0.05475199818611145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,64,power_law_1.2,0.059987199306488034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,64,power_law_1.2,0.06539520025253295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,64,power_law_1.2,0.06453760266304016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,64,power_law_1.2,0.07010560035705567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,64,power_law_1.2,0.07991039752960205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,64,power_law_1.2,0.08847360014915466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,64,power_law_1.2,0.11386239528656006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,64,power_law_1.2,0.1344063997268677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,64,power_law_1.2,0.16754560470581054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,64,power_law_1.2,0.20556159019470216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,64,power_law_1.2,0.30171520709991456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,64,power_law_1.2,0.37068800926208495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,64,power_law_1.2,0.6424255847930909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,64,power_law_1.2,0.789408016204834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,8,power_law_1.01,0.056467199325561525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,8,power_law_1.01,0.0697983980178833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,8,power_law_1.01,0.07739520072937012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,8,power_law_1.01,0.08213760256767273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,8,power_law_1.01,0.09591040015220642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,8,power_law_1.01,0.10911359786987304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,8,power_law_1.01,0.1054144024848938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,8,power_law_1.01,0.10042879581451417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,8,power_law_1.01,0.10248960256576538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,8,power_law_1.01,0.1003775954246521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,8,power_law_1.01,0.10328320264816285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,8,power_law_1.01,0.09778559803962708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,8,power_law_1.01,0.10551680326461792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,8,power_law_1.01,0.10855040550231934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,8,power_law_1.01,0.11473280191421509
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,8,power_law_1.01,0.113811194896698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,8,power_law_1.01,0.12104320526123047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,8,power_law_1.01,0.131494402885437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,8,power_law_1.01,0.13427200317382812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,8,power_law_1.01,0.1578495979309082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,8,power_law_1.01,0.17377920150756837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,8,power_law_1.01,0.20670080184936523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,8,power_law_1.01,0.23685119152069092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,8,power_law_1.01,0.3049855947494507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,8,power_law_1.01,0.3724416017532349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,8,power_law_1.01,0.5221248149871827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,8,power_law_1.01,0.6356287956237793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,64,power_law_1.2,0.056032001972198486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,64,power_law_1.2,0.041894400119781496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,64,power_law_1.2,0.04166400134563446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,64,power_law_1.2,0.042483198642730716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,64,power_law_1.2,0.0427264004945755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,64,power_law_1.2,0.044409599900245664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,64,power_law_1.2,0.043961599469184875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,64,power_law_1.2,0.04439679980278015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,64,power_law_1.2,0.04485760033130646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,64,power_law_1.2,0.04556800127029419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,64,power_law_1.2,0.046540799736976626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,64,power_law_1.2,0.04745599925518036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,64,power_law_1.2,0.04805760085582733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,64,power_law_1.2,0.0497408002614975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,64,power_law_1.2,0.05456640124320984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,64,power_law_1.2,0.05495679974555969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,64,power_law_1.2,0.05811200141906738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,64,power_law_1.2,0.06627200245857238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,64,power_law_1.2,0.070278400182724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,64,power_law_1.2,0.08161919713020324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,64,power_law_1.2,0.09103360176086425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,64,power_law_1.2,0.11936000585556031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,64,power_law_1.2,0.149126398563385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,64,power_law_1.2,0.20594561100006104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,64,power_law_1.2,0.2870847940444946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,64,power_law_1.2,0.41212158203125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,64,power_law_1.2,0.48567681312561034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,64,balanced,0.052202666799227394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,64,balanced,0.04665599763393402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,64,balanced,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,64,balanced,0.04622933268547058
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,64,balanced,0.04608533283074697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,64,balanced,0.04739200075467428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,64,balanced,0.04811733464399973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,64,balanced,0.050293331344922386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,64,balanced,0.04950400193532308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,64,balanced,0.050069332122802734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,64,balanced,0.05182399849096934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,64,balanced,0.05012799799442291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,64,balanced,0.05273066461086273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,64,balanced,0.05277333160241445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,64,balanced,0.056314667065938316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,64,balanced,0.05825066566467285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,64,balanced,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,64,balanced,0.06504533191521962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,64,balanced,0.06868266562620799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,64,balanced,0.07654933134714763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,64,balanced,0.0897866686185201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,64,balanced,0.11947733163833618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,64,balanced,0.13766400019327799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,64,balanced,0.18388799826304117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,64,balanced,0.2217280069986979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,64,balanced,0.3015893300374349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,64,balanced,0.3804639975229899
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,128,balanced,0.0422026664018631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,128,balanced,0.04190400242805481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,128,balanced,0.041375999649365745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,128,balanced,0.04486933350563049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,128,balanced,0.04632000128428141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,128,balanced,0.0461760014295578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,128,balanced,0.04780800143877665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,128,balanced,0.05017066498597463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,16,power_law_1.01,0.057171201705932616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,16,power_law_1.01,0.057631999254226685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,16,power_law_1.01,0.0493120014667511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,16,power_law_1.01,0.05134720206260681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,16,power_law_1.01,0.05198079943656921
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,16,power_law_1.01,0.05246080160140991
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,16,power_law_1.01,0.05241600275039673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,16,power_law_1.01,0.053932797908782956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,16,power_law_1.01,0.05422080159187317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,16,power_law_1.01,0.054124802350997925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,16,power_law_1.01,0.05649920105934143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,16,power_law_1.01,0.056435197591781616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,16,power_law_1.01,0.056550401449203494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,16,power_law_1.01,0.05857920050621033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,16,power_law_1.01,0.0627135992050171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,16,power_law_1.01,0.06382079720497132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,16,power_law_1.01,0.0687936007976532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,16,power_law_1.01,0.07863039970397949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,16,power_law_1.01,0.08373759984970093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,16,power_law_1.01,0.09838719964027405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,16,power_law_1.01,0.10779520273208618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,16,power_law_1.01,0.13286399841308594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,16,power_law_1.01,0.16971520185470582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,16,power_law_1.01,0.22224640846252441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,16,power_law_1.01,0.2729727983474731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,16,power_law_1.01,0.39190399646759033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,16,power_law_1.01,0.500767993927002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,128,balanced,0.048250665267308555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,128,balanced,0.050330668687820435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,128,balanced,0.0521066685517629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,128,balanced,0.05249066650867462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,128,balanced,0.0521919975678126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,128,balanced,0.054655998945236206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,128,balanced,0.06222933530807495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,128,balanced,0.06011733412742615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,128,balanced,0.06239999830722809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,128,balanced,0.06830400228500366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,128,balanced,0.0729013333717982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,128,balanced,0.08429333567619324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,128,balanced,0.08998933434486389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,128,balanced,0.11480533083279927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,128,balanced,0.13051733374595642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,128,balanced,0.17012800772984824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,128,balanced,0.21224000056584677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,128,balanced,0.29310399293899536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,128,balanced,0.3739946683247884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,16,balanced,0.03811733424663544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,16,balanced,0.037050666908423104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,16,balanced,0.03965333352486292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,16,balanced,0.03939733405907949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,16,balanced,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,16,balanced,0.043749332427978516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,16,balanced,0.04571199913819631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,16,balanced,0.04456533491611481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,16,balanced,0.04457599918047587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,16,balanced,0.048021331429481506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,16,balanced,0.045978665351867676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,16,balanced,0.04981866478919983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,16,balanced,0.048026666045188904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,16,balanced,0.054042667150497437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,16,balanced,0.05625600119431814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,16,balanced,0.056976000467936196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,16,balanced,0.05811200042565664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,16,balanced,0.064410666624705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,16,balanced,0.06654400130112965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,16,balanced,0.07257066667079926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,16,balanced,0.08322133123874664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,16,balanced,0.10698666175206502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,16,balanced,0.12129066387812297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,16,balanced,0.15770133336385092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,16,balanced,0.19550400972366333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,16,balanced,0.26065067450205487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,16,balanced,0.33372799555460614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,8,balanced,0.04219200213750204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,8,balanced,0.04408533374468485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,8,balanced,0.05380799869696299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,8,balanced,0.07644799848397572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,8,balanced,0.110042671362559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,8,balanced,0.17114667097727457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,8,balanced,0.17202667395273843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,8,balanced,0.17273066441218057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,8,balanced,0.1739520033200582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,8,balanced,0.17279465993245444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,8,balanced,0.17469332615534464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,8,balanced,0.1755680044492086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,8,balanced,0.1768640081087748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,8,balanced,0.17684799432754517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,8,balanced,0.18427199125289917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,8,balanced,0.18347734212875366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,8,balanced,0.18626666069030762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,8,balanced,0.19409066438674927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,8,balanced,0.19789334138234457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,8,balanced,0.2119413415590922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,8,balanced,0.22583999236424765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,8,balanced,0.2571573257446289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,8,balanced,0.2753653327624003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,8,balanced,0.3401706616083781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,8,balanced,0.37957334518432617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,8,balanced,0.5138239860534668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,8,balanced,0.6041386524836222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,128,balanced,0.02181333303451538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,128,balanced,0.020165332903464634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,128,balanced,0.01939733326435089
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,128,balanced,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,128,balanced,0.021317332983016968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,128,balanced,0.023578666150569916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,128,balanced,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,128,balanced,0.03152533372243246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,128,balanced,0.03186666717131933
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,128,balanced,0.03183999905983607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,128,balanced,0.031514666974544525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,128,balanced,0.03161066770553589
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,128,balanced,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,128,balanced,0.03173333406448364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,128,balanced,0.034474665919939675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,128,balanced,0.03589866558710734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,128,balanced,0.033573334415753685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,128,balanced,0.03584533433119456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,128,balanced,0.04022933294375738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,128,balanced,0.04435733457406362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,128,balanced,0.05239466826121012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,128,balanced,0.06435733536879222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,128,balanced,0.08079466720422109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,128,balanced,0.1072160005569458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,128,balanced,0.13379200299580893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,128,balanced,0.18535999457041422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,128,balanced,0.23988266785939535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,4,balanced,0.04426133135954539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,4,balanced,0.04776533444722494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,4,balanced,0.059157331784566246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,4,balanced,0.07628266513347626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,4,balanced,0.10977600018183391
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,4,balanced,0.17373865842819214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,4,balanced,0.17204799254735312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,4,balanced,0.17369065682093301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,4,balanced,0.17466666301091513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,4,balanced,0.1750026742617289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,4,balanced,0.17459734280904135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,4,balanced,0.1787359913190206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,4,balanced,0.17922133207321167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,4,balanced,0.1803413430849711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,4,balanced,0.18529067436854044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,4,balanced,0.18598934014638266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,4,balanced,0.191103994846344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,4,balanced,0.20003734032313028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,4,balanced,0.20548800627390543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,4,balanced,0.22292800744374594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,4,balanced,0.2403786579767863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,4,balanced,0.28008000055948895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,4,balanced,0.31752532720565796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,4,balanced,0.3944213390350342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,4,balanced,0.45182931423187256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,4,balanced,0.6190186738967896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,4,balanced,0.7440000375111898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,8,power_law_1.01,0.047219198942184445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,8,power_law_1.01,0.057651197910308837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,8,power_law_1.01,0.06195840239524841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,8,power_law_1.01,0.06468480229377746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,8,power_law_1.01,0.07310079932212829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,8,power_law_1.01,0.07242239713668823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,8,power_law_1.01,0.07733759880065919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,8,power_law_1.01,0.07378559708595275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,8,power_law_1.01,0.08191360235214233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,8,power_law_1.01,0.0787392020225525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,8,power_law_1.01,0.08127359747886657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,8,power_law_1.01,0.08141440153121948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,8,power_law_1.01,0.08338559865951538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,8,power_law_1.01,0.08606719970703125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,8,power_law_1.01,0.09285119771957398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,8,power_law_1.01,0.09537919759750366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,8,power_law_1.01,0.10258560180664063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,8,power_law_1.01,0.11458560228347778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,8,power_law_1.01,0.11947519779205322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,8,power_law_1.01,0.13997440338134765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,8,power_law_1.01,0.15502079725265502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,8,power_law_1.01,0.19594240188598633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,8,power_law_1.01,0.23141119480133057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,8,power_law_1.01,0.3054656028747559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,8,power_law_1.01,0.40723838806152346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,8,power_law_1.01,0.5643712043762207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,8,power_law_1.01,0.7672383785247803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,32,power_law_1.2,0.056627202033996585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,32,power_law_1.2,0.05635200142860412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,32,power_law_1.2,0.0543936014175415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,32,power_law_1.2,0.06056960225105286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,32,power_law_1.2,0.05301759839057922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,32,power_law_1.2,0.051846402883529666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,32,power_law_1.2,0.0545087993144989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,32,power_law_1.2,0.05592319965362549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,32,power_law_1.2,0.052153599262237546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,32,power_law_1.2,0.05688959956169128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,32,power_law_1.2,0.06236159801483154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,32,power_law_1.2,0.06432639956474304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,32,power_law_1.2,0.062003201246261595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,32,power_law_1.2,0.06626560091972351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,32,power_law_1.2,0.0697920024394989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,32,power_law_1.2,0.07642239928245545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,32,power_law_1.2,0.08043519854545593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,32,power_law_1.2,0.09402880072593689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,32,power_law_1.2,0.09587839841842652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,32,power_law_1.2,0.11062400341033936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,32,power_law_1.2,0.13125760555267335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,32,power_law_1.2,0.1684224009513855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,32,power_law_1.2,0.1982143998146057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,32,power_law_1.2,0.2652928113937378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,32,power_law_1.2,0.3571840047836304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,32,power_law_1.2,0.45751681327819826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,32,power_law_1.2,0.7091008186340332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,2,balanced,0.045941332976023354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,2,balanced,0.047968000173568726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,2,balanced,0.048112000028292336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,2,balanced,0.053914666175842285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,2,balanced,0.06357866525650024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,2,balanced,0.08122666676839192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,2,balanced,0.08238400022188823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,2,balanced,0.08490133285522461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,2,balanced,0.0851093331972758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,2,balanced,0.08737599849700928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,2,balanced,0.08768000205357869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,2,balanced,0.08990933497746785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,2,balanced,0.09110400080680847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,2,balanced,0.09573333462079366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,2,balanced,0.10083733002344768
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,2,balanced,0.10299733281135559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,2,balanced,0.11134399970372517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,2,balanced,0.12558933099110922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,2,balanced,0.1381013294061025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,2,balanced,0.16487466295560202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,2,balanced,0.1953386664390564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,2,balanced,0.24908800919850668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,2,balanced,0.30266666412353516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,2,balanced,0.4464053312937419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,2,balanced,0.5504853328069051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,2,balanced,0.8165280024210612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,2,balanced,1.0682613054911296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,8,power_law_1.01,0.020185600221157073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,8,power_law_1.01,0.01916159987449646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,8,power_law_1.01,0.020019200444221497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,8,power_law_1.01,0.020390400290489198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,8,power_law_1.01,0.020921599864959717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,8,power_law_1.01,0.021529600024223328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,8,power_law_1.01,0.02222079932689667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,8,power_law_1.01,0.02314240038394928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,8,power_law_1.01,0.024031999707221984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,8,power_law_1.01,0.024409599602222443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,8,power_law_1.01,0.024588799476623534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,8,power_law_1.01,0.025945600867271424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,8,power_law_1.01,0.02570880055427551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,8,power_law_1.01,0.02656640112400055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,8,power_law_1.01,0.02791680097579956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,8,power_law_1.01,0.032691198587417605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,8,power_law_1.01,0.035206401348114015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,8,power_law_1.01,0.03849599957466125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,8,power_law_1.01,0.058271998167037965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,8,power_law_1.01,0.06846719980239868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,8,power_law_1.01,0.06385279893875122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,8,power_law_1.01,0.07470719814300537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,8,power_law_1.01,0.0832319974899292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,8,power_law_1.01,0.10392960309982299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,8,power_law_1.01,0.12846720218658447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,8,power_law_1.01,0.17477760314941407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,8,power_law_1.01,0.22159359455108643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,4,balanced,0.05026133358478546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,4,balanced,0.04994666576385498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,4,balanced,0.055770665407180786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,4,balanced,0.07175466914971669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,4,balanced,0.09764267007509868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,4,balanced,0.13804800311724344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,4,balanced,0.13990400234858194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,4,balanced,0.14408533771832785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,4,balanced,0.14261333147684732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,4,balanced,0.14239999651908875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,4,balanced,0.14436266819636026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,4,balanced,0.14637866616249084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,4,balanced,0.14711999893188477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,4,balanced,0.15005333224932352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,4,balanced,0.1558133363723755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,4,balanced,0.15730667114257812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,4,balanced,0.16328000028928122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,4,balanced,0.17383466164271036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,4,balanced,0.1811093290646871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,4,balanced,0.2060533364613851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,4,balanced,0.23093332846959433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,4,balanced,0.2694773276646932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,4,balanced,0.31626667579015094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,4,balanced,0.4323466618855794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,4,balanced,0.5118346611658732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,4,balanced,0.7173333168029785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,4,balanced,0.9052533308664957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,8,power_law_1.2,0.024108800292015075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,8,power_law_1.2,0.03675520122051239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,8,power_law_1.2,0.0358271986246109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,8,power_law_1.2,0.042796799540519716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,8,power_law_1.2,0.048153600096702574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,8,power_law_1.2,0.048902401328086854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,8,power_law_1.2,0.05666559934616089
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,8,power_law_1.2,0.05014399886131286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,8,power_law_1.2,0.06663039922714234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,8,power_law_1.2,0.061247998476028444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,8,power_law_1.2,0.061843198537826535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,8,power_law_1.2,0.0902400016784668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,8,power_law_1.2,0.08570240139961242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,8,power_law_1.2,0.07901440262794494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,8,power_law_1.2,0.09871360063552856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,8,power_law_1.2,0.10609920024871826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,8,power_law_1.2,0.10773119926452637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,8,power_law_1.2,0.1295040011405945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,8,power_law_1.2,0.12383359670639038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,8,power_law_1.2,0.12373759746551513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,8,power_law_1.2,0.12726399898529053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,8,power_law_1.2,0.13891199827194214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,8,power_law_1.2,0.1573632001876831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,8,power_law_1.2,0.20452480316162108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,8,power_law_1.2,0.255731201171875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,8,power_law_1.2,0.35484800338745115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,8,power_law_1.2,0.4616896152496338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,1,power_law_1.01,0.04850560128688812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,1,power_law_1.01,0.05129600167274475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,1,power_law_1.01,0.05485439896583557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,1,power_law_1.01,0.07073280215263367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,1,power_law_1.01,0.08580480217933655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,1,power_law_1.01,0.09943680167198181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,1,power_law_1.01,0.11651840209960937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,1,power_law_1.01,0.11979520320892334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,1,power_law_1.01,0.12341760396957398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,1,power_law_1.01,0.12744959592819213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,1,power_law_1.01,0.1313279986381531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,1,power_law_1.01,0.1371840000152588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,1,power_law_1.01,0.13903360366821288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,1,power_law_1.01,0.1423359990119934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,1,power_law_1.01,0.14913920164108277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,1,power_law_1.01,0.1554751992225647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,1,power_law_1.01,0.16867200136184693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,1,power_law_1.01,0.20614399909973144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,1,power_law_1.01,0.22072319984436034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,1,power_law_1.01,0.2746880054473877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,1,power_law_1.01,0.3162879943847656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,1,power_law_1.01,0.4198592185974121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,1,power_law_1.01,0.5165120124816894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,1,power_law_1.01,0.7000319957733154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,1,power_law_1.01,0.889305591583252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,1,power_law_1.01,1.2663871765136718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,1,power_law_1.01,1.6411199569702148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,16,balanced,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,16,balanced,0.047983999053637184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,16,balanced,0.04565866788228353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,16,balanced,0.04986133178075155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,16,balanced,0.05390933156013489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,16,balanced,0.058101331194241844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,16,balanced,0.06030400097370148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,16,balanced,0.0601440022389094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,16,balanced,0.05993066728115082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,16,balanced,0.05995733539263407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,16,balanced,0.061946665247281395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,16,balanced,0.06211733321348826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,16,balanced,0.06041066845258077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,16,balanced,0.06436799963315327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,16,balanced,0.0662773350874583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,16,balanced,0.06857066849867503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,16,balanced,0.07120533287525177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,16,balanced,0.07738666733105977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,16,balanced,0.08051733175913493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,16,balanced,0.09417066971460979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,16,balanced,0.10611200332641602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,16,balanced,0.1285866697629293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,16,balanced,0.14702399571736655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,16,balanced,0.19518399238586426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,16,balanced,0.23231999079386392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,16,balanced,0.32039467493693036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,16,balanced,0.40694932142893475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,2,power_law_1.2,0.04410879909992218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,2,power_law_1.2,0.04751999974250794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,2,power_law_1.2,0.050169599056243894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,2,power_law_1.2,0.06407679915428162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,2,power_law_1.2,0.07477759718894958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,2,power_law_1.2,0.08074880242347718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,2,power_law_1.2,0.10149760246276855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,2,power_law_1.2,0.10599039793014527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,2,power_law_1.2,0.10570240020751953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,2,power_law_1.2,0.10791679620742797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,2,power_law_1.2,0.11411839723587036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,2,power_law_1.2,0.11547520160675048
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,2,power_law_1.2,0.11957759857177734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,2,power_law_1.2,0.1242751955986023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,2,power_law_1.2,0.13308800458908082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,2,power_law_1.2,0.13687039613723756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,2,power_law_1.2,0.14116480350494384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,2,power_law_1.2,0.16263680458068847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,2,power_law_1.2,0.1742527961730957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,2,power_law_1.2,0.20666239261627198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,2,power_law_1.2,0.2347520112991333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,2,power_law_1.2,0.3054464101791382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,2,power_law_1.2,0.36506879329681396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,2,power_law_1.2,0.4675327777862549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,2,power_law_1.2,0.5863808155059814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,2,power_law_1.2,0.8340543746948242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,2,power_law_1.2,1.0633791923522948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,2,balanced,0.09920533498128255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,2,balanced,0.10356799761454265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,2,balanced,0.11326400438944499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,2,balanced,0.12923199931780496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,2,balanced,0.16235199570655823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,2,balanced,0.2366186579068502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,2,balanced,0.23851199944814047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,2,balanced,0.23962666591008505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,2,balanced,0.23798400163650513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,2,balanced,0.24210133155186972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,2,balanced,0.24245333671569824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,2,balanced,0.24745599428812662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,2,balanced,0.25067732731501263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,2,balanced,0.2516213258107503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,2,balanced,0.25562665859858197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,2,balanced,0.25673067569732666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,2,balanced,0.26212799549102783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,2,balanced,0.2779093384742737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,2,balanced,0.29077865680058795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,2,balanced,0.32038400570551556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,2,balanced,0.3492853244145711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,2,balanced,0.4135253429412842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,2,balanced,0.47809068361918133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,2,balanced,0.6733439763387045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,2,balanced,0.8060373465220133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,2,balanced,1.1479573249816895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,2,balanced,1.5076533953348796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,1,power_law_1.2,0.018335999548435213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,1,power_law_1.2,0.018688000738620758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,1,power_law_1.2,0.021510399878025055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,1,power_law_1.2,0.026214399933815004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,1,power_law_1.2,0.03296639919281006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,1,power_law_1.2,0.04229120016098022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,1,power_law_1.2,0.052275198698043826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,1,power_law_1.2,0.05497599840164184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,1,power_law_1.2,0.055980801582336426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,1,power_law_1.2,0.0595583975315094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,1,power_law_1.2,0.061689597368240354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,1,power_law_1.2,0.06513280272483826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,1,power_law_1.2,0.06842880249023438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,1,power_law_1.2,0.07391999959945679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,1,power_law_1.2,0.07434239983558655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,1,power_law_1.2,0.07979519963264466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,1,power_law_1.2,0.0911616027355194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,1,power_law_1.2,0.08698239922523499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,1,power_law_1.2,0.1002303957939148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,1,power_law_1.2,0.11395200490951538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,1,power_law_1.2,0.1317055940628052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,1,power_law_1.2,0.21772799491882325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,1,power_law_1.2,0.254694390296936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,1,power_law_1.2,0.2350912094116211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,1,power_law_1.2,0.27209599018096925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,1,power_law_1.2,0.35589759349822997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,1,power_law_1.2,0.45444478988647463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,16,balanced,0.03789333254098892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,16,balanced,0.03955200066169103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,16,balanced,0.039690665900707245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,16,balanced,0.0415786678592364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,16,balanced,0.05811200042565664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,16,balanced,0.0745066652695338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,16,balanced,0.07479466497898102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,16,balanced,0.07632000247637431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,16,balanced,0.07680533329645793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,16,balanced,0.07663466533025105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,16,balanced,0.0758133331934611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,16,balanced,0.07841599980990092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,16,balanced,0.07845866680145264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,16,balanced,0.07894399762153625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,16,balanced,0.08301333089669545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,16,balanced,0.08636800448099773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,16,balanced,0.08881066242853801
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,16,balanced,0.09480533003807068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,16,balanced,0.09914666414260864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,16,balanced,0.10899733503659566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,16,balanced,0.11795199910799663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,16,balanced,0.13613866766293845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,16,balanced,0.1530239979426066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,16,balanced,0.1902880072593689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,16,balanced,0.2232053279876709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,16,balanced,0.30059200525283813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,16,balanced,0.3816266854604085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,64,power_law_1.01,0.04854399859905243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,64,power_law_1.01,0.04478079974651337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,64,power_law_1.01,0.04467200040817261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,64,power_law_1.01,0.04747520089149475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,64,power_law_1.01,0.046982398629188536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,64,power_law_1.01,0.04805119931697845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,64,power_law_1.01,0.04824320077896118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,64,power_law_1.01,0.04987519979476929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,64,power_law_1.01,0.04963200092315674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,64,power_law_1.01,0.04978559911251068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,64,power_law_1.01,0.050419199466705325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,64,power_law_1.01,0.051583999395370485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,64,power_law_1.01,0.051974397897720334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,64,power_law_1.01,0.05334399938583374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,64,power_law_1.01,0.0574720025062561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,64,power_law_1.01,0.058412802219390866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,64,power_law_1.01,0.06162559986114502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,64,power_law_1.01,0.06740480065345764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,64,power_law_1.01,0.07542399764060974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,64,power_law_1.01,0.08727040290832519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,64,power_law_1.01,0.10006400346755981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,64,power_law_1.01,0.12164479494094849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,64,power_law_1.01,0.14381439685821534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,64,power_law_1.01,0.192467200756073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,64,power_law_1.01,0.23047680854797364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,64,power_law_1.01,0.3272831916809082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,64,power_law_1.01,0.41743998527526854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,128,power_law_1.2,0.0502016007900238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,128,power_law_1.2,0.05053439736366272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,128,power_law_1.2,0.05108479857444763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,128,power_law_1.2,0.05336959958076477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,128,power_law_1.2,0.05249279737472534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,128,power_law_1.2,0.052102398872375486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,128,power_law_1.2,0.05240960121154785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,128,power_law_1.2,0.053548800945281985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,128,power_law_1.2,0.05248000025749207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,128,power_law_1.2,0.053414398431777955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,128,power_law_1.2,0.05369600057601929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,128,power_law_1.2,0.05615360140800476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,128,power_law_1.2,0.05455999970436096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,128,power_law_1.2,0.05514879822731018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,128,power_law_1.2,0.05978879928588867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,128,power_law_1.2,0.060288000106811526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,128,power_law_1.2,0.06570240259170532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,128,power_law_1.2,0.07102720141410827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,128,power_law_1.2,0.0775168001651764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,128,power_law_1.2,0.09242879748344421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,128,power_law_1.2,0.10325759649276733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,128,power_law_1.2,0.1278272032737732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,128,power_law_1.2,0.15763839483261108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,128,power_law_1.2,0.21408638954162598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,128,power_law_1.2,0.262393593788147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,128,power_law_1.2,0.38915200233459474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,128,power_law_1.2,0.5259200096130371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,16,balanced,0.04576533536116282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,16,balanced,0.045909335215886436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,16,balanced,0.04571733375390371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,16,balanced,0.05884266893068949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,16,balanced,0.07640533149242401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,16,balanced,0.11471999684969585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,16,balanced,0.1153706709543864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,16,balanced,0.11152000228563945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,16,balanced,0.112527996301651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,16,balanced,0.113045334815979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,16,balanced,0.11357333262761433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,16,balanced,0.11437867085138957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,16,balanced,0.11584533254305522
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,16,balanced,0.11515733599662781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,16,balanced,0.12024533748626709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,16,balanced,0.12050666411717732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,16,balanced,0.12185600399971008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,16,balanced,0.12795733412106833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,16,balanced,0.13173866271972656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,16,balanced,0.14010666807492575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,16,balanced,0.15011733770370483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,16,balanced,0.1743839979171753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,16,balanced,0.1851200064023336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,16,balanced,0.23481067021687826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,16,balanced,0.2677653431892395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,16,balanced,0.35545599460601807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,16,balanced,0.4354453484217326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,2,balanced,0.04770133395989736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,2,balanced,0.04821333289146423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,2,balanced,0.05194133520126343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,2,balanced,0.06426133215427399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,2,balanced,0.0865760048230489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,2,balanced,0.12477866808573405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,2,balanced,0.1249120036760966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,2,balanced,0.12683733304341635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,2,balanced,0.12742933630943298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,2,balanced,0.13020267089207968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,2,balanced,0.1306613286336263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,2,balanced,0.13517866532007852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,2,balanced,0.13635200262069702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,2,balanced,0.13726933797200522
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,2,balanced,0.1423786679903666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,2,balanced,0.1467626690864563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,2,balanced,0.15643200278282166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,2,balanced,0.1750239928563436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,2,balanced,0.18621333440144858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,2,balanced,0.21593066056569418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,2,balanced,0.24580800533294678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,2,balanced,0.3225333293279012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,2,balanced,0.37105600039164227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,2,balanced,0.5209173361460367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,2,balanced,0.6386666695276896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,2,balanced,0.9209120273590088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,2,balanced,1.2035040060679119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,2,balanced,0.019786667078733444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,2,balanced,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,2,balanced,0.023050665855407715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,2,balanced,0.027509334186712902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,2,balanced,0.04307733476161957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,2,balanced,0.06196799874305725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,2,balanced,0.06154666841030121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,2,balanced,0.06257600088914235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,2,balanced,0.0637600024541219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,2,balanced,0.06628266473611195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,2,balanced,0.0669653316338857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,2,balanced,0.06625600159168243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,2,balanced,0.07125333448251088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,2,balanced,0.08292266726493835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,2,balanced,0.08244266609350841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,2,balanced,0.08207466701666515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,2,balanced,0.08564800024032593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,2,balanced,0.09356266260147095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,2,balanced,0.1009279986222585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,2,balanced,0.11956800023714702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,2,balanced,0.11901332934697469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,2,balanced,0.17287466923395792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,2,balanced,0.22170666853586832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,2,balanced,0.22710400819778442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,2,balanced,0.238154669602712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,2,balanced,0.41707201798756915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,2,balanced,0.4324959913889567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,64,power_law_1.2,0.04984959959983826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,64,power_law_1.2,0.045132800936698914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,64,power_law_1.2,0.04376960098743439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,64,power_law_1.2,0.04283519983291626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,64,power_law_1.2,0.04357759952545166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,64,power_law_1.2,0.04351359903812409
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,64,power_law_1.2,0.04509440064430237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,64,power_law_1.2,0.04542079865932465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,64,power_law_1.2,0.04580479860305786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,64,power_law_1.2,0.04631040096282959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,64,power_law_1.2,0.047142401337623596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,8,power_law_1.01,0.020211200416088104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,64,power_law_1.2,0.04842239916324616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,64,power_law_1.2,0.04937599897384644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,8,power_law_1.01,0.024691200256347655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,64,power_law_1.2,0.05187199711799621
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,8,power_law_1.01,0.02784000039100647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,64,power_law_1.2,0.05555840134620667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,64,power_law_1.2,0.05783680081367493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,8,power_law_1.01,0.02885119915008545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,64,power_law_1.2,0.06085119843482971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,8,power_law_1.01,0.03563520014286041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,64,power_law_1.2,0.07147520184516906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,64,power_law_1.2,0.07182719707489013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,8,power_law_1.01,0.0332863986492157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,64,power_law_1.2,0.09102720022201538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,8,power_law_1.01,0.03885439932346344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,64,power_law_1.2,0.09887999892234803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,8,power_law_1.01,0.03779839873313904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,64,power_law_1.2,0.1362944006919861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,8,power_law_1.01,0.04588800072669983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,64,power_law_1.2,0.16588799953460692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,8,power_law_1.01,0.0425024002790451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,64,power_law_1.2,0.2259135961532593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,8,power_law_1.01,0.04266240000724793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,64,power_law_1.2,0.28104960918426514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,8,power_law_1.01,0.05632640123367309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,8,power_law_1.01,0.057043200731277464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,64,power_law_1.2,0.41137919425964353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,8,power_law_1.01,0.054502397775650024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,64,power_law_1.2,0.5269696235656738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,8,power_law_1.01,0.06806399822235107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,8,power_law_1.01,0.0710528016090393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,8,power_law_1.01,0.0686016023159027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,8,power_law_1.01,0.08417279720306396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,8,power_law_1.01,0.0815999984741211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,8,power_law_1.01,0.08133760094642639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,8,power_law_1.01,0.08554880023002624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,8,power_law_1.01,0.0949184000492096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,8,power_law_1.01,0.10883200168609619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,8,power_law_1.01,0.1383039951324463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,8,power_law_1.01,0.16381440162658692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,8,power_law_1.01,0.23015038967132567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,8,power_law_1.01,0.28239998817443845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,32,power_law_1.2,0.04737919867038727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,32,power_law_1.2,0.04590719938278198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,32,power_law_1.2,0.045075199007987975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,32,power_law_1.2,0.04245760142803192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,32,power_law_1.2,0.04636160135269165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,32,power_law_1.2,0.046540799736976626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,32,power_law_1.2,0.048179200291633605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,32,power_law_1.2,0.0487744003534317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,32,power_law_1.2,0.0486847996711731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,32,power_law_1.2,0.05061759948730469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,32,power_law_1.2,0.05009920001029968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,32,power_law_1.2,0.05173119902610779
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,32,power_law_1.2,0.0529151976108551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,32,power_law_1.2,0.05728639960289002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,32,power_law_1.2,0.061484801769256595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,32,power_law_1.2,0.06239359974861145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,32,power_law_1.2,0.0639680027961731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,32,power_law_1.2,0.06949120163917541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,32,power_law_1.2,0.07422080039978027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,32,power_law_1.2,0.08607360124588012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,32,power_law_1.2,0.09935359954833985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,32,power_law_1.2,0.11749119758605957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,32,power_law_1.2,0.14161280393600464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,32,power_law_1.2,0.19786239862442018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,32,power_law_1.2,0.2358720064163208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,32,power_law_1.2,0.34133760929107665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,32,power_law_1.2,0.44887681007385255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,4,power_law_1.01,0.04366079866886139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,4,power_law_1.01,0.04451839923858643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,4,power_law_1.01,0.045459198951721194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,4,power_law_1.01,0.04766719937324524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,4,power_law_1.01,0.05121920108795166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,4,power_law_1.01,0.0531391978263855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,4,power_law_1.01,0.0576960027217865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,4,power_law_1.01,0.05720319747924805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,4,power_law_1.01,0.05786240100860596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,4,power_law_1.01,0.05875840187072754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,4,power_law_1.01,0.06083199977874756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,4,power_law_1.01,0.06343039870262146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,4,power_law_1.01,0.0636031985282898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,4,power_law_1.01,0.07034239768981934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,4,power_law_1.01,0.07812479734420777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,4,power_law_1.01,0.08250880241394043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,4,power_law_1.01,0.08136320114135742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,4,power_law_1.01,0.09342079758644103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,4,power_law_1.01,0.10380159616470337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,4,power_law_1.01,0.13365119695663452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,4,power_law_1.01,0.15575040578842164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,4,power_law_1.01,0.2006848096847534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,4,power_law_1.01,0.23865599632263185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,4,power_law_1.01,0.33680000305175783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,4,power_law_1.01,0.43873281478881837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,4,power_law_1.01,0.621017599105835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,4,power_law_1.01,0.7842304229736328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,2,power_law_1.2,0.045542401075363156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,2,power_law_1.2,0.05085440278053284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,2,power_law_1.2,0.054118400812149046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,2,power_law_1.2,0.06897280216217042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,2,power_law_1.2,0.07676799893379212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,2,power_law_1.2,0.08952320218086243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,2,power_law_1.2,0.10725760459899902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,2,power_law_1.2,0.1076159954071045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,2,power_law_1.2,0.1126207947731018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,2,power_law_1.2,0.11872639656066894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,2,power_law_1.2,0.11574399471282959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,2,power_law_1.2,0.1263808012008667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,2,power_law_1.2,0.1267840027809143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,2,power_law_1.2,0.132697594165802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,2,power_law_1.2,0.13976320028305053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,2,power_law_1.2,0.14487680196762084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,2,power_law_1.2,0.15511679649353027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,2,power_law_1.2,0.17378560304641724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,2,power_law_1.2,0.1889024019241333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,2,power_law_1.2,0.23276159763336182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,2,power_law_1.2,0.2690432071685791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,2,power_law_1.2,0.34590721130371094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,2,power_law_1.2,0.4199488162994385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,2,power_law_1.2,0.555840015411377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,2,power_law_1.2,0.6922815799713135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,2,power_law_1.2,0.9630975723266602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,2,power_law_1.2,1.3397184371948243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,8,power_law_1.2,0.04395520091056824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,8,power_law_1.2,0.04880639910697937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,8,power_law_1.2,0.046316799521446225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,8,power_law_1.2,0.05601279735565186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,8,power_law_1.2,0.059462398290634155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,8,power_law_1.2,0.05939840078353882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,8,power_law_1.2,0.05991680026054382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,8,power_law_1.2,0.057843202352523805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,8,power_law_1.2,0.05661439895629883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,8,power_law_1.2,0.056857597827911374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,8,power_law_1.2,0.06266239881515503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,8,power_law_1.2,0.06641280055046081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,8,power_law_1.2,0.0679423987865448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,8,power_law_1.2,0.07116159796714783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,8,power_law_1.2,0.07758079767227173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,8,power_law_1.2,0.0806335985660553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,8,power_law_1.2,0.08520320057868958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,8,power_law_1.2,0.09664639830589294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,8,power_law_1.2,0.10599679946899414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,8,power_law_1.2,0.12817920446395875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,8,power_law_1.2,0.14754559993743896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,8,power_law_1.2,0.1786944031715393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,8,power_law_1.2,0.22259199619293213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,8,power_law_1.2,0.31845118999481203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,8,power_law_1.2,0.4008063793182373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,8,power_law_1.2,0.5487936019897461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,8,power_law_1.2,0.7454847812652587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,128,power_law_1.2,0.052243202924728394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,128,power_law_1.2,0.048281601071357726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,128,power_law_1.2,0.05156480073928833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,128,power_law_1.2,0.04894720017910004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,128,power_law_1.2,0.050995200872421265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,128,power_law_1.2,0.0520576000213623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,128,power_law_1.2,0.051558399200439455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,128,power_law_1.2,0.05319679975509643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,128,power_law_1.2,0.05255680084228516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,128,power_law_1.2,0.0546239972114563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,128,power_law_1.2,0.05319679975509643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,128,power_law_1.2,0.05427200198173523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,128,power_law_1.2,0.05594239830970764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,128,power_law_1.2,0.05614079833030701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,128,power_law_1.2,0.059084802865982056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,128,power_law_1.2,0.05985919833183288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,128,power_law_1.2,0.06382719874382019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,128,power_law_1.2,0.07109119892120361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,128,power_law_1.2,0.07646719813346863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,128,power_law_1.2,0.09021440148353577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,128,power_law_1.2,0.10711679458618165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,128,power_law_1.2,0.14821120500564575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,128,power_law_1.2,0.1770624041557312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,128,power_law_1.2,0.2382591962814331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,128,power_law_1.2,0.2995007991790771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,128,power_law_1.2,0.44768638610839845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,128,power_law_1.2,0.5705855846405029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,32,power_law_1.2,0.07209600210189819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,32,power_law_1.2,0.06944000124931335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,32,power_law_1.2,0.07050880193710327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,32,power_law_1.2,0.07589120268821717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,32,power_law_1.2,0.06709759831428527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,32,power_law_1.2,0.06470400094985962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,32,power_law_1.2,0.07742720246315002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,32,power_law_1.2,0.07184640169143677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,32,power_law_1.2,0.0720192015171051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,32,power_law_1.2,0.07401599884033203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,32,power_law_1.2,0.07651200294494628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,32,power_law_1.2,0.08113279938697815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,32,power_law_1.2,0.07992960214614868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,32,power_law_1.2,0.08915839791297912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,32,power_law_1.2,0.09029759764671326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,32,power_law_1.2,0.09630079865455628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,32,power_law_1.2,0.09475200176239014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,32,power_law_1.2,0.10981119871139526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,32,power_law_1.2,0.10805759429931641
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,32,power_law_1.2,0.13144960403442382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,32,power_law_1.2,0.1348736047744751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,32,power_law_1.2,0.16934399604797362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,32,power_law_1.2,0.1888383984565735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,1,power_law_1.01,0.10710400342941284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,32,power_law_1.2,0.2451200008392334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,1,power_law_1.01,0.14043519496917725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,32,power_law_1.2,0.3529855966567993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,1,power_law_1.01,0.19071359634399415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,32,power_law_1.2,0.4919616222381592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,1,power_law_1.01,0.27324159145355226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,32,power_law_1.2,0.6111616134643555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,1,power_law_1.01,0.34351999759674073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,1,power_law_1.01,0.39292159080505373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,1,power_law_1.01,0.509830379486084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,1,power_law_1.01,0.5442495822906495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,1,power_law_1.01,0.5478208065032959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,1,power_law_1.01,0.554911994934082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,1,power_law_1.01,0.5665408134460449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,1,power_law_1.01,0.5884736061096192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,1,power_law_1.01,0.5873407840728759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,1,power_law_1.01,0.6167168140411377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,1,power_law_1.01,0.6204415798187256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,1,power_law_1.01,0.6338240146636963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,1,power_law_1.01,0.6382656097412109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,1,power_law_1.01,0.7038591861724853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,1,power_law_1.01,0.7520703792572021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,1,power_law_1.01,0.8498047828674317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,1,power_law_1.01,0.9633919715881347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,1,power_law_1.01,1.1611712455749512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,1,power_law_1.01,1.338700771331787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,1,power_law_1.01,1.7533119201660157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,1,power_law_1.01,2.1176000595092774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,1,power_law_1.01,2.891097640991211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,1,power_law_1.01,3.6809150695800783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,4,power_law_1.01,0.018348799645900728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,4,power_law_1.01,0.018374399840831758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,4,power_law_1.01,0.01929599940776825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,4,power_law_1.01,0.021491199731826782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,4,power_law_1.01,0.02279040068387985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,4,power_law_1.01,0.0244159996509552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,4,power_law_1.01,0.0268095999956131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,4,power_law_1.01,0.02707839906215668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,4,power_law_1.01,0.03129599988460541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,4,power_law_1.01,0.03141759932041168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,4,power_law_1.01,0.032102400064468385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,4,power_law_1.01,0.03472639918327332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,4,power_law_1.01,0.03359360098838806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,4,power_law_1.01,0.034745600819587705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,4,power_law_1.01,0.046028798818588255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,4,power_law_1.01,0.04320639967918396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,4,power_law_1.01,0.04534400105476379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,4,power_law_1.01,0.06301440000534057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,4,power_law_1.01,0.07071359753608704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,4,power_law_1.01,0.08234239816665649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,4,power_law_1.01,0.08117759823799134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,4,power_law_1.01,0.09319679737091065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,4,power_law_1.01,0.10758399963378906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,4,power_law_1.01,0.1279360055923462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,4,power_law_1.01,0.15464320182800292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,4,power_law_1.01,0.20353920459747316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,4,power_law_1.01,0.266214394569397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,2,balanced,0.11554666360219319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,2,balanced,0.12041067083676656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,2,balanced,0.1341759959856669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,2,balanced,0.1687893271446228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,2,balanced,0.23855467637379965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,2,balanced,0.37993065516153973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,2,balanced,0.38039998213450116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,2,balanced,0.3834826548894246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,2,balanced,0.3834826548894246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,2,balanced,0.38646399974823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,2,balanced,0.3874826828638713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,2,balanced,0.3922079801559448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,2,balanced,0.39373334248860675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,2,balanced,0.3975253502527873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,2,balanced,0.40085868040720624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,2,balanced,0.4047360022862752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,2,balanced,0.4092373450597127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,2,balanced,0.42563732465108234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,2,balanced,0.4389919837315877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,2,balanced,0.4699999888737996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,2,balanced,0.5002719958623251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,2,balanced,0.563701351483663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,2,balanced,0.6338933308919271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,2,balanced,0.8841173648834229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,2,balanced,1.0245440006256104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,2,balanced,1.4430774052937825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,2,balanced,1.897546609242757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,16,power_law_1.01,0.04570879936218262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,16,power_law_1.01,0.06170880198478699
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,16,power_law_1.01,0.04686720073223114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,16,power_law_1.01,0.0486847996711731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,16,power_law_1.01,0.04951040148735046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,16,power_law_1.01,0.04925439953804016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,16,power_law_1.01,0.049830400943756105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,16,power_law_1.01,0.0502016007900238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,16,power_law_1.01,0.05101439952850342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,16,power_law_1.01,0.05107839703559876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,16,power_law_1.01,0.05200639963150024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,16,power_law_1.01,0.053401601314544675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,16,power_law_1.01,0.05422080159187317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,16,power_law_1.01,0.058284801244735715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,16,power_law_1.01,0.06343039870262146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,16,power_law_1.01,0.06663680076599121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,16,power_law_1.01,0.06561920046806335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,16,power_law_1.01,0.07393919825553893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,16,power_law_1.01,0.0827072024345398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,16,power_law_1.01,0.09927679896354676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,16,power_law_1.01,0.11139839887619019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,16,power_law_1.01,0.14289920330047606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,16,power_law_1.01,0.17853440046310426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,16,power_law_1.01,0.24090878963470458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,16,power_law_1.01,0.2883584022521973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,16,power_law_1.01,0.41185917854309084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,16,power_law_1.01,0.5313407897949218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,32,balanced,0.04200000067551931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,32,balanced,0.04199466605981191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,32,balanced,0.041562666495641075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,32,balanced,0.044165333112080894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,32,balanced,0.04801600178082784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,32,balanced,0.049733335773150124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,32,balanced,0.05040533343950907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,32,balanced,0.05189333359400431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,32,balanced,0.051962668697039284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,32,balanced,0.052069331208864846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,32,balanced,0.05375466744105021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,32,balanced,0.053914666175842285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,32,balanced,0.05208533505598704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,32,balanced,0.05533866584300995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,32,balanced,0.0591839998960495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,32,balanced,0.060559997955958046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,32,balanced,0.0639626681804657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,32,balanced,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,32,balanced,0.07019733389218648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,32,balanced,0.0800906668106715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,32,balanced,0.08922132849693298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,32,balanced,0.10355200370152791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,32,balanced,0.11986666917800903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,32,balanced,0.16642666856447855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,32,balanced,0.19341866175333658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,32,balanced,0.26418666044871014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,32,balanced,0.33513601620992023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,64,power_law_1.01,0.05311999917030334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,64,power_law_1.01,0.043699198961257936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,64,power_law_1.01,0.043942400813102724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,64,power_law_1.01,0.04647679924964905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,64,power_law_1.01,0.046265599131584165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,64,power_law_1.01,0.047270399332046506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,64,power_law_1.01,0.047731199860572816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,64,power_law_1.01,0.04904319941997528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,64,power_law_1.01,0.04894079864025116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,64,power_law_1.01,0.0483711987733841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,64,power_law_1.01,0.049465599656105044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,64,power_law_1.01,0.050860798358917235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,64,power_law_1.01,0.05030400156974792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,64,power_law_1.01,0.051769602298736575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,64,power_law_1.01,0.056620800495147706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,64,power_law_1.01,0.057171201705932616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,64,power_law_1.01,0.06043519973754883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,64,power_law_1.01,0.06554880142211914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,64,power_law_1.01,0.07067520022392274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,64,power_law_1.01,0.08053119778633118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,64,power_law_1.01,0.09032959938049316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,64,power_law_1.01,0.10710400342941284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,64,power_law_1.01,0.1283455967903137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,64,power_law_1.01,0.17982079982757568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,64,power_law_1.01,0.21140480041503906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,64,power_law_1.01,0.30154240131378174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,64,power_law_1.01,0.38064000606536863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,32,power_law_1.2,0.04552319943904877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,32,power_law_1.2,0.04430719912052154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,32,power_law_1.2,0.04310399889945984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,32,power_law_1.2,0.0462336003780365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,32,power_law_1.2,0.047775998711586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,32,power_law_1.2,0.04750719964504242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,32,power_law_1.2,0.04798080027103424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,32,power_law_1.2,0.04965119957923889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,32,power_law_1.2,0.04967679977416992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,32,power_law_1.2,0.05012480020523071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,32,power_law_1.2,0.0494592010974884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,32,power_law_1.2,0.05251200199127197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,32,power_law_1.2,0.052070397138595584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,32,power_law_1.2,0.053011202812194826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,32,power_law_1.2,0.05809280276298523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,32,power_law_1.2,0.05974400043487549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,32,power_law_1.2,0.06339840292930603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,32,power_law_1.2,0.069760000705719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,32,power_law_1.2,0.08152959942817688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,32,power_law_1.2,0.09969279766082764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,32,power_law_1.2,0.11642240285873413
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,32,power_law_1.2,0.16032639741897584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,32,power_law_1.2,0.19598720073699952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,32,power_law_1.2,0.2641279935836792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,32,power_law_1.2,0.3185152053833008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,32,power_law_1.2,0.5063295841217041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,2,balanced,0.04785066843032837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,2,balanced,0.05634133517742157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,2,balanced,0.07625600198904674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,2,balanced,0.1090613305568695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,32,power_law_1.2,0.6940991878509521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,2,balanced,0.17243733008702597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,2,balanced,0.29417065779368085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,2,balanced,0.29577066500981647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,2,balanced,0.29688000679016113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,2,balanced,0.2977973421414693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,2,balanced,0.2969759901364644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,2,balanced,0.30060799916585285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,2,balanced,0.30400000015894574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,2,balanced,0.30424533287684125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,2,balanced,0.30762133995691937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,2,balanced,0.31435734033584595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,2,balanced,0.3153173327445984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,2,balanced,0.3253013292948405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,2,balanced,0.3382879892985026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,2,balanced,0.34832533200581867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,2,balanced,0.37699198722839355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,2,balanced,0.41200534502665204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,2,balanced,0.46546133359273273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,2,balanced,0.5134079853693644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,2,balanced,0.6445600191752116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,2,balanced,0.7269066969553629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,2,balanced,1.018778642018636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,2,balanced,1.218127965927124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,2,balanced,0.0706879993279775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,2,balanced,0.08665600419044495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,2,balanced,0.11144000291824341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,2,balanced,0.1604426701863607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,2,balanced,0.248906672000885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,2,balanced,0.4026399850845337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,2,balanced,0.39296531677246094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,2,balanced,0.3731946547826131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,2,balanced,0.3691893418629964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,2,balanced,0.3557120164235433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,2,balanced,0.36659733454386395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,2,balanced,0.3565760056177775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,2,balanced,0.35903998215993244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,2,balanced,0.3756800095240275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,2,balanced,0.3662453492482503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,2,balanced,0.3593653440475464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,2,balanced,0.4081813494364421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,2,balanced,0.3877919912338257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,2,balanced,0.4188266595204671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,2,balanced,0.41874667008717853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,2,balanced,0.4541706641515096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,2,balanced,0.5191200176874796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,2,balanced,0.5895146528879801
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,2,balanced,0.9281333287556967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,2,balanced,1.0448373158772786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,2,balanced,1.5221120516459148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,2,balanced,1.8825866381327312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,64,power_law_1.2,0.058156800270080564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,64,power_law_1.2,0.04142079949378967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,64,power_law_1.2,0.04300160109996796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,64,power_law_1.2,0.04673919975757599
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,64,power_law_1.2,0.046649599075317384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,64,power_law_1.2,0.04657920002937317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,64,power_law_1.2,0.048281601071357726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,64,power_law_1.2,0.04785920083522797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,64,power_law_1.2,0.04787839949131012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,64,power_law_1.2,0.04886400103569031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,64,power_law_1.2,0.05278720259666443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,64,power_law_1.2,0.053830397129058835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,64,power_law_1.2,0.05707520246505737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,64,power_law_1.2,0.056460797786712646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,64,power_law_1.2,0.0615231990814209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,64,power_law_1.2,0.06378239989280701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,64,power_law_1.2,0.06219519972801209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,64,power_law_1.2,0.07191680073738098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,64,power_law_1.2,0.07420799732208253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,64,power_law_1.2,0.09115520119667053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,64,power_law_1.2,0.10007679462432861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,64,power_law_1.2,0.12246400117874146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,64,power_law_1.2,0.14715520143508912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,64,power_law_1.2,0.2054271936416626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,64,power_law_1.2,0.24204800128936768
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,64,power_law_1.2,0.3647680044174194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,64,power_law_1.2,0.43496317863464357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,16,balanced,0.0402453343073527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,16,balanced,0.04164266586303711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,16,balanced,0.04145599901676178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,16,balanced,0.04287999868392944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,16,balanced,0.04784533381462097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,16,balanced,0.04976533353328705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,16,balanced,0.05171733101209005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,16,balanced,0.052469333012898765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,16,balanced,0.053157334526379905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,16,balanced,0.052655999859174095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,16,balanced,0.05400000015894572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,16,balanced,0.05389333268006643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,16,balanced,0.0537120004494985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,16,balanced,0.05593599875768026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,16,balanced,0.05820799867312113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,16,balanced,0.060005332032839455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,16,balanced,0.0639519989490509
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,16,balanced,0.06990933418273926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,16,balanced,0.07229333122571309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,16,balanced,0.08417066931724548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,16,balanced,0.09945066769917806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,16,balanced,0.12982933719952902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,16,balanced,0.1597653329372406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,16,balanced,0.21247466405232748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,16,balanced,0.25171200434366864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,16,balanced,0.3484799861907959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,16,balanced,0.44624535242716473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,8,power_law_1.01,0.04078719913959503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,8,power_law_1.01,0.044736000895500186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,8,power_law_1.01,0.04322560131549835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,8,power_law_1.01,0.04632959961891174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,8,power_law_1.01,0.04874880015850067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,8,power_law_1.01,0.04618239998817444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,8,power_law_1.01,0.04723199903964996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,8,power_law_1.01,0.04919039905071258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,8,power_law_1.01,0.04746879935264588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,8,power_law_1.01,0.04888960123062134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,8,power_law_1.01,0.05003520250320435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,8,power_law_1.01,0.05052800178527832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,8,power_law_1.01,0.050246399641036985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,8,power_law_1.01,0.052665597200393675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,8,power_law_1.01,0.05780479907989502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,8,power_law_1.01,0.0604095995426178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,8,power_law_1.01,0.0665727972984314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,8,power_law_1.01,0.07571200132369996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,8,power_law_1.01,0.08028159737586975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,8,power_law_1.01,0.09633920192718506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,8,power_law_1.01,0.11215360164642334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,8,power_law_1.01,0.1372928023338318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,8,power_law_1.01,0.1609536051750183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,8,power_law_1.01,0.22053120136260987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,8,power_law_1.01,0.28098559379577637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,8,power_law_1.01,0.38601601123809814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,8,power_law_1.01,0.47716479301452636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,32,power_law_1.2,0.04805119931697845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,32,power_law_1.2,0.04671359956264496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,32,power_law_1.2,0.046489599347114566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,32,power_law_1.2,0.049446401000022885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,32,power_law_1.2,0.04989440143108368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,32,power_law_1.2,0.04992640018463135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,32,power_law_1.2,0.05129600167274475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,32,power_law_1.2,0.05203199982643127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,32,power_law_1.2,0.052928000688552856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,32,power_law_1.2,0.052883201837539674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,32,power_law_1.2,0.05343359708786011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,32,power_law_1.2,0.05766400098800659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,32,power_law_1.2,0.0593280017375946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,32,power_law_1.2,0.0597055971622467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,32,power_law_1.2,0.061977601051330565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,32,power_law_1.2,0.06290559768676758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,32,power_law_1.2,0.06999679803848266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,32,power_law_1.2,0.08085119724273682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,32,power_law_1.2,0.09007359743118286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,32,power_law_1.2,0.10943360328674316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,32,power_law_1.2,0.12613120079040527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,32,power_law_1.2,0.16902400255203248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,32,power_law_1.2,0.21773440837860109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,32,power_law_1.2,0.2922303915023804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,32,power_law_1.2,0.38662400245666506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,32,power_law_1.2,0.5868095874786377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,32,power_law_1.2,0.7998976230621337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,8,power_law_1.2,0.04688000082969666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,8,power_law_1.2,0.05542399883270264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,8,power_law_1.2,0.05583999752998352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,8,power_law_1.2,0.07009919881820678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,8,power_law_1.2,0.07166720032691956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,8,power_law_1.2,0.07233279943466187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,8,power_law_1.2,0.07789440155029297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,8,power_law_1.2,0.0735040009021759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,8,power_law_1.2,0.07850239872932434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,8,power_law_1.2,0.07541120052337646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,8,power_law_1.2,0.0800320029258728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,8,power_law_1.2,0.08399360179901123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,8,power_law_1.2,0.08488960266113281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,8,power_law_1.2,0.08678399920463561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,8,power_law_1.2,0.09505280256271362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,8,power_law_1.2,0.09866880178451538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,8,power_law_1.2,0.10026880502700805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,8,power_law_1.2,0.11845760345458985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,8,power_law_1.2,0.12191359996795655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,8,power_law_1.2,0.1453376054763794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,8,power_law_1.2,0.16592639684677124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,8,power_law_1.2,0.20061440467834474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,8,power_law_1.2,0.24901120662689208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,8,power_law_1.2,0.35598719120025635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,8,power_law_1.2,0.4245120048522949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,8,power_law_1.2,0.6082560062408447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,8,power_law_1.2,0.8092032432556152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,32,balanced,0.04409066836039225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,32,balanced,0.04207466542720795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,32,balanced,0.04208533465862274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,32,balanced,0.044106667240460716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,32,balanced,0.048250665267308555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,32,balanced,0.04982399940490723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,32,balanced,0.050570666790008545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,32,balanced,0.05188799897829691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,32,balanced,0.052111998200416565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,32,balanced,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,32,balanced,0.05242133140563965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,32,balanced,0.053685332338015236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,32,balanced,0.052015999952952065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,32,balanced,0.05473599831263224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,32,balanced,0.058330665032068886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,32,balanced,0.06001066664854685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,32,balanced,0.06403199831644694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,32,balanced,0.06677333513895671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,32,balanced,0.07015466690063477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,32,balanced,0.08046933511892955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,32,balanced,0.08704533179601033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,32,balanced,0.10312533378601074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,32,balanced,0.12001599868138631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,32,balanced,0.16150400042533875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,32,balanced,0.1950613260269165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,32,balanced,0.2685973246892293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,32,balanced,0.3397173484166463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,4,power_law_1.2,0.057171201705932616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,4,power_law_1.2,0.07317759990692138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,4,power_law_1.2,0.07522559762001038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,4,power_law_1.2,0.09976959824562073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,4,power_law_1.2,0.11118079423904419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,4,power_law_1.2,0.12038400173187255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,4,power_law_1.2,0.1532096028327942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,4,power_law_1.2,0.14570239782333375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,4,power_law_1.2,0.15145599842071533
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,4,power_law_1.2,0.14880000352859496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,4,power_law_1.2,0.15569280385971068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,4,power_law_1.2,0.15812480449676514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,4,power_law_1.2,0.16268160343170165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,4,power_law_1.2,0.1640895962715149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,4,power_law_1.2,0.1723456025123596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,4,power_law_1.2,0.17619199752807618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,4,power_law_1.2,0.18350080251693726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,4,power_law_1.2,0.21313281059265138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,4,power_law_1.2,0.2115839958190918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,4,power_law_1.2,0.2545727968215942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,4,power_law_1.2,0.26723198890686034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,4,power_law_1.2,0.34427518844604493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,4,power_law_1.2,0.36718719005584716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,4,power_law_1.2,0.4767744064331055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,4,power_law_1.2,0.5754687786102295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,4,power_law_1.2,0.7824128150939942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,4,power_law_1.2,1.0051648139953613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,128,balanced,0.05442133545875549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,128,balanced,0.06617600222428639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,128,balanced,0.042165334026018776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,128,balanced,0.04571199913819631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,128,balanced,0.045696000258127846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,128,balanced,0.04970666766166687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,128,balanced,0.047653332352638245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,128,balanced,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,128,balanced,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,128,balanced,0.05005866785844167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,128,balanced,0.04965866605440775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,128,balanced,0.049973333875338234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,128,balanced,0.05202133456865946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,128,balanced,0.05212800204753876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,128,balanced,0.05585599939028422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,128,balanced,0.05640000104904175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,128,balanced,0.05606399973233541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,128,balanced,0.06005866825580597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,128,balanced,0.06203199923038483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,128,balanced,0.06817600131034851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,128,balanced,0.07439466814200084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,128,balanced,0.08892266949017842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,128,balanced,0.10273067156473796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,128,balanced,0.136245330174764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,128,balanced,0.16235733032226562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,128,balanced,0.21599467595418295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,128,balanced,0.2674559950828552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,2,balanced,0.043621331453323364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,2,balanced,0.043893332282702126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,2,balanced,0.05407466491063436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,2,balanced,0.07863999903202057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,2,balanced,0.11645866433779399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,2,balanced,0.17785600821177164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,2,balanced,0.18010665973027548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,2,balanced,0.18106667200724283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,2,balanced,0.18226132790247598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,2,balanced,0.1830880045890808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,2,balanced,0.18744534254074097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,2,balanced,0.18998400370279947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,2,balanced,0.18826667467753092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,2,balanced,0.19698667526245117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,2,balanced,0.2034133275349935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,2,balanced,0.20600533485412598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,2,balanced,0.21267733971277872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,2,balanced,0.22988800207773843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,2,balanced,0.24291733900705972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,2,balanced,0.27874666452407837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,2,balanced,0.3083786765734355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,2,balanced,0.3910986582438151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,2,balanced,0.45178667704264325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,2,balanced,0.6111093362172445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,2,balanced,0.7320373058319092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,2,balanced,1.0468213558197021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,2,balanced,1.3119626839955647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,16,power_law_1.01,0.04580479860305786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,16,power_law_1.01,0.06255360245704651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,16,power_law_1.01,0.05469440221786499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,16,power_law_1.01,0.06085119843482971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,16,power_law_1.01,0.06213120222091675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,16,power_law_1.01,0.060524797439575194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,16,power_law_1.01,0.059334397315979004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,16,power_law_1.01,0.05627520084381103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,16,power_law_1.01,0.059724801778793336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,16,power_law_1.01,0.060844802856445314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,16,power_law_1.01,0.058956801891326904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,16,power_law_1.01,0.06117759943008423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,16,power_law_1.01,0.06272000074386597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,16,power_law_1.01,0.06741120219230652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,16,power_law_1.01,0.07422080039978027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,16,power_law_1.01,0.07850880026817322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,16,power_law_1.01,0.08131840229034423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,16,power_law_1.01,0.09177600145339966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,16,power_law_1.01,0.09571840167045594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,16,power_law_1.01,0.11400959491729737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,16,power_law_1.01,0.1248128056526184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,16,power_law_1.01,0.15671039819717408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,16,power_law_1.01,0.1922943949699402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,16,power_law_1.01,0.2581439971923828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,16,power_law_1.01,0.3103231906890869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,16,power_law_1.01,0.46854400634765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,16,power_law_1.01,0.5812672138214111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,32,power_law_1.2,0.024505600333213806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,32,power_law_1.2,0.023814399540424348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,32,power_law_1.2,0.02359039932489395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,32,power_law_1.2,0.023238399624824525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,32,power_law_1.2,0.02388480007648468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,32,power_law_1.2,0.026502400636672974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,32,power_law_1.2,0.03696640133857727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,32,power_law_1.2,0.03464959859848023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,32,power_law_1.2,0.05086719989776611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,32,power_law_1.2,0.05189120173454285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,32,power_law_1.2,0.04981760084629059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,32,power_law_1.2,0.054771202802658084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,32,power_law_1.2,0.054636800289154054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,32,power_law_1.2,0.05438079833984375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,32,power_law_1.2,0.054476797580718994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,32,power_law_1.2,0.05324159860610962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,32,power_law_1.2,0.05555840134620667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,32,power_law_1.2,0.05139840245246887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,32,power_law_1.2,0.053958398103713986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,32,power_law_1.2,0.06320639848709106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,32,power_law_1.2,0.07035520076751708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,32,power_law_1.2,0.08994560241699219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,32,power_law_1.2,0.10424319505691529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,32,power_law_1.2,0.14487040042877197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,32,power_law_1.2,0.18622080087661744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,32,power_law_1.2,0.27295360565185545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,32,power_law_1.2,0.34339199066162107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,32,balanced,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,32,balanced,0.043061330914497375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,32,balanced,0.0421013335386912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,32,balanced,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,32,balanced,0.048010667165120445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,32,balanced,0.04839999973773956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,32,balanced,0.05060266455014547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,32,balanced,0.053786665201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,32,balanced,0.050154666105906166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,32,balanced,0.053914666175842285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,32,balanced,0.05380799869696299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,32,balanced,0.05806933343410492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,32,balanced,0.05872533222039541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,32,balanced,0.06032533446947733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,32,balanced,0.0605973352988561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,32,balanced,0.06029333174228668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,32,balanced,0.07007466753323872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,32,balanced,0.0743999977906545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,32,balanced,0.08071466783682506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,32,balanced,0.09141866366068523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,32,balanced,0.10097066561381023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,32,balanced,0.12484799822171529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,32,balanced,0.14653333028157553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,32,balanced,0.19210133949915567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,32,balanced,0.23282132546106973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,32,balanced,0.32662399609883624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,32,balanced,0.40850667158762616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,64,power_law_1.01,0.04726400077342987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,64,power_law_1.01,0.04297600090503693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,64,power_law_1.01,0.04159359931945801
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,64,power_law_1.01,0.043968001008033754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,64,power_law_1.01,0.0438400000333786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,64,power_law_1.01,0.044198399782180785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,64,power_law_1.01,0.044435200095176694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,64,power_law_1.01,0.04560000002384186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,64,power_law_1.01,0.046028798818588255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,64,power_law_1.01,0.04590719938278198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,64,power_law_1.01,0.047142401337623596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,64,power_law_1.01,0.04821119904518127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,64,power_law_1.01,0.04876160025596619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,64,power_law_1.01,0.050457602739334105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,64,power_law_1.01,0.055302399396896365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,64,power_law_1.01,0.05571200251579285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,64,power_law_1.01,0.0603007972240448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,64,power_law_1.01,0.06625919938087463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,64,power_law_1.01,0.07191039919853211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,64,power_law_1.01,0.08335999846458435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,64,power_law_1.01,0.0917952001094818
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,64,power_law_1.01,0.12227840423583984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,64,power_law_1.01,0.1523200035095215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,64,power_law_1.01,0.2029439926147461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,64,power_law_1.01,0.263155198097229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,64,power_law_1.01,0.3742847919464111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,64,power_law_1.01,0.44355201721191406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,16,power_law_1.2,0.0690559983253479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,16,power_law_1.2,0.09014400243759155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,16,power_law_1.2,0.08480640053749085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,16,power_law_1.2,0.08863999843597412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,16,power_law_1.2,0.09191039800643921
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,16,power_law_1.2,0.07864320278167725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,16,power_law_1.2,0.09204480051994324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,16,power_law_1.2,0.09256319999694824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,16,power_law_1.2,0.08773760199546814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,16,power_law_1.2,0.09094399809837342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,16,power_law_1.2,0.09242240190505982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,16,power_law_1.2,0.09367039799690247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,16,power_law_1.2,0.09450240135192871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,16,power_law_1.2,0.09642879962921143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,16,power_law_1.2,0.10290559530258178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,16,power_law_1.2,0.10358400344848633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,16,power_law_1.2,0.10807679891586304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,16,power_law_1.2,0.11608959436416626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,16,power_law_1.2,0.12318079471588135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,16,power_law_1.2,0.1534719944000244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,16,power_law_1.2,0.17066240310668945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,16,power_law_1.2,0.22501759529113768
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,16,power_law_1.2,0.26871039867401125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,16,power_law_1.2,0.4045440196990967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,16,power_law_1.2,0.4909823894500732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,16,power_law_1.2,0.7679488182067871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,16,power_law_1.2,0.9630592346191407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,8,power_law_1.2,0.07397760152816772
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,8,power_law_1.2,0.09485440254211426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,8,power_law_1.2,0.09168639779090881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,8,power_law_1.2,0.10209920406341552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,8,power_law_1.2,0.12580480575561523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,8,power_law_1.2,0.12426879405975341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,8,power_law_1.2,0.12732800245285034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,8,power_law_1.2,0.12313599586486816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,8,power_law_1.2,0.12799999713897706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,8,power_law_1.2,0.1260159969329834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,8,power_law_1.2,0.12821760177612304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,8,power_law_1.2,0.12776319980621337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,8,power_law_1.2,0.131769597530365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,8,power_law_1.2,0.1335935950279236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,8,power_law_1.2,0.1407807946205139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,8,power_law_1.2,0.14227839708328247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,8,power_law_1.2,0.15323519706726074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,8,power_law_1.2,0.1674496054649353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,8,power_law_1.2,0.17479679584503174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,8,power_law_1.2,0.20451200008392334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,8,power_law_1.2,0.23423359394073487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,8,power_law_1.2,0.28721280097961427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,8,power_law_1.2,0.34583680629730223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,8,power_law_1.2,0.5018432140350342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,8,power_law_1.2,0.6022079944610595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,8,power_law_1.2,0.9513664245605469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,8,power_law_1.2,1.1716927528381347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,128,power_law_1.01,0.018361599743366243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,128,power_law_1.01,0.02033279985189438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,128,power_law_1.01,0.017990399897098542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,128,power_law_1.01,0.01976960003376007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,128,power_law_1.01,0.023347200453281404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,128,power_law_1.01,0.036006399989128114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,128,power_law_1.01,0.033107200264930726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,128,power_law_1.01,0.033241599798202515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,128,power_law_1.01,0.03343360126018524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,128,power_law_1.01,0.033580800890922545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,128,power_law_1.01,0.03356159925460815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,128,power_law_1.01,0.033817601203918454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,128,power_law_1.01,0.033766400814056394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,128,power_law_1.01,0.0320576012134552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,128,power_law_1.01,0.03210879862308502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,128,power_law_1.01,0.032332798838615416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,128,power_law_1.01,0.03625600039958954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,128,power_law_1.01,0.03573119938373566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,128,power_law_1.01,0.039027199149131775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,128,power_law_1.01,0.05133439898490906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,128,power_law_1.01,0.0536191999912262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,128,power_law_1.01,0.06783999800682068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,128,power_law_1.01,0.08271999955177307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,128,power_law_1.01,0.11114239692687988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,128,power_law_1.01,0.14169600009918212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,128,power_law_1.01,0.20049920082092285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,128,power_law_1.01,0.25791358947753906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,8,balanced,0.04797333478927612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,8,balanced,0.05051200091838837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,8,balanced,0.052095999320348106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,8,balanced,0.056143999099731445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,8,balanced,0.06434666613737743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,8,balanced,0.07743466893831889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,8,balanced,0.07870933413505554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,8,balanced,0.07858133316040039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,8,balanced,0.07702399790287018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,8,balanced,0.0779306689898173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,8,balanced,0.07852800190448761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,8,balanced,0.07970666885375977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,8,balanced,0.07698666552702586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,8,balanced,0.0804319977760315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,8,balanced,0.08442667126655579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,8,balanced,0.08455999692281087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,8,balanced,0.0897173285484314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,8,balanced,0.09676266709963481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,8,balanced,0.10203199585278828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,8,balanced,0.11372799674669902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,8,balanced,0.1275146702925364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,8,balanced,0.15213333566983542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,8,balanced,0.1811786691347758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,8,balanced,0.24812267223993936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,8,balanced,0.2974613308906555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,8,balanced,0.4245813290278117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,8,balanced,0.5380693276723226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,1,power_law_1.01,0.01879040002822876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,1,power_law_1.01,0.02014079988002777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,1,power_law_1.01,0.026419198513031004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,1,power_law_1.01,0.03578880131244659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,1,power_law_1.01,0.04725759923458099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,1,power_law_1.01,0.06017919778823853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,1,power_law_1.01,0.07022719979286193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,1,power_law_1.01,0.07492480278015137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,1,power_law_1.01,0.07653120160102844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,1,power_law_1.01,0.07998719811439514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,1,power_law_1.01,0.0828544020652771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,1,power_law_1.01,0.0813759982585907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,1,power_law_1.01,0.08752639889717102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,1,power_law_1.01,0.08869119882583618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,1,power_law_1.01,0.0942911982536316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,1,power_law_1.01,0.09852799773216248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,1,power_law_1.01,0.10990719795227051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,1,power_law_1.01,0.12002559900283813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,1,power_law_1.01,0.12938239574432372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,1,power_law_1.01,0.15434880256652833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,1,power_law_1.01,0.17802239656448365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,1,power_law_1.01,0.22254080772399903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,1,power_law_1.01,0.20581119060516356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,1,power_law_1.01,0.2682368040084839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,1,power_law_1.01,0.3296447992324829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,1,power_law_1.01,0.45776638984680174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,1,power_law_1.01,0.5844480037689209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,128,balanced,0.039093332986036934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,128,balanced,0.039808000127474465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,128,balanced,0.03998400022586187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,128,balanced,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,128,balanced,0.0439573327700297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,128,balanced,0.04471466441949209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,8,balanced,0.047797332207361855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,8,balanced,0.0498879998922348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,8,balanced,0.05198933184146881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,8,balanced,0.05815466741720835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,8,balanced,0.07034666836261749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,8,balanced,0.08711466193199158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,8,balanced,0.0860640009244283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,8,balanced,0.08472533027331035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,8,balanced,0.0862613320350647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,8,balanced,0.0845973292986552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,8,balanced,0.08403733372688293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,8,balanced,0.08456533153851827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,8,balanced,0.0846560001373291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,8,balanced,0.08490133285522461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,8,balanced,0.08733333150545756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,8,balanced,0.09078400333722432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,8,balanced,0.09552533427874248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,8,balanced,0.10101866722106934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,8,balanced,0.10929600397745769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,8,balanced,0.12230400244394939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,8,balanced,0.13523733615875244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,8,balanced,0.1604106624921163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,8,balanced,0.18844266732533774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,8,balanced,0.25837866465250653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,8,balanced,0.30718932549158734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,8,balanced,0.442410667737325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,8,balanced,0.5648159980773926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,128,balanced,0.0473280002673467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,128,balanced,0.048581331968307495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,1,power_law_1.2,0.08262400031089782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,1,power_law_1.2,0.11627520322799682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,1,power_law_1.2,0.17180800437927246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,1,power_law_1.2,0.2662528038024902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,1,power_law_1.2,0.36778879165649414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,1,power_law_1.2,0.505024003982544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,1,power_law_1.2,0.7156991958618164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,1,power_law_1.2,0.788044786453247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,1,power_law_1.2,0.7910463809967041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,1,power_law_1.2,0.8178624153137207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,1,power_law_1.2,0.8478079795837402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,1,power_law_1.2,0.8953087806701661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,1,power_law_1.2,0.9102911949157715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,1,power_law_1.2,0.9317119598388672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,1,power_law_1.2,0.9600064277648925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,1,power_law_1.2,0.9834943771362304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,1,power_law_1.2,1.05862398147583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,1,power_law_1.2,1.1620800018310546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,1,power_law_1.2,1.132588768005371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,1,power_law_1.2,1.2694016456604005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,1,power_law_1.2,1.2920703887939453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,1,power_law_1.2,1.488704013824463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,1,power_law_1.2,1.669536018371582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,1,power_law_1.2,2.0036544799804688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,1,power_law_1.2,2.275609588623047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,1,power_law_1.2,2.8973247528076174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,1,power_law_1.2,3.542950439453125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,128,balanced,0.04809600114822388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,128,balanced,0.04994133114814758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,2,power_law_1.01,0.0573248028755188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,2,power_law_1.01,0.07780479788780212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,2,power_law_1.01,0.08855680227279664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,2,power_law_1.01,0.12602239847183228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,2,power_law_1.01,0.15314559936523436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,2,power_law_1.01,0.17666560411453247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,2,power_law_1.01,0.24040958881378174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,2,power_law_1.01,0.22903680801391602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,2,power_law_1.01,0.2545023918151855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,2,power_law_1.01,0.2432960033416748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,2,power_law_1.01,0.2529536008834839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,2,power_law_1.01,0.26797440052032473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,2,power_law_1.01,0.26746881008148193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,2,power_law_1.01,0.2834815979003906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,2,power_law_1.01,0.28709120750427247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,2,power_law_1.01,0.2725311994552612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,2,power_law_1.01,0.3024512052536011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,2,power_law_1.01,0.3314815998077393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,2,power_law_1.01,0.3384768009185791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,2,power_law_1.01,0.3853823900222778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,2,power_law_1.01,0.4049536228179932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,2,power_law_1.01,0.47560319900512693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,128,balanced,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,128,balanced,0.05417066812515259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,128,balanced,0.051914667089780174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,128,balanced,0.05460800230503082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,128,balanced,0.060133333007494606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,128,balanced,0.0609386662642161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,128,balanced,0.06436799963315327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,128,balanced,0.06924800078074138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,128,balanced,0.07450133562088013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,128,balanced,0.08494933446248372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,128,balanced,0.09070932865142822
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,128,balanced,0.11762666702270508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,128,balanced,0.13396799564361572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,128,balanced,0.1751520037651062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,128,balanced,0.21480000019073486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,128,balanced,0.2981013258298238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,128,balanced,0.37785065174102783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,2,power_law_1.01,0.5415616035461426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,2,power_law_1.01,0.6840191841125488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,2,power_law_1.01,0.8492351531982422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,2,power_law_1.01,1.139731216430664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,2,power_law_1.01,1.4073344230651856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,2,balanced,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,2,balanced,0.023503998915354412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,2,balanced,0.027477333943049114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,2,balanced,0.04063999901215235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,2,balanced,0.06029866635799408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,2,balanced,0.09315733114878337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,2,balanced,0.09505066275596619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,2,balanced,0.09684266646703084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,2,balanced,0.09987200299898784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,2,balanced,0.10261332988739014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,2,balanced,0.10462933778762817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,2,balanced,0.10708799958229065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,2,balanced,0.11085866888364156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,2,balanced,0.11125333110491435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,2,balanced,0.11724266409873962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,2,balanced,0.12215466300646464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,2,balanced,0.1372213363647461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,2,balanced,0.14724266529083252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,2,balanced,0.14443733294804892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,2,balanced,0.1932106614112854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,2,balanced,0.16890132427215576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,2,balanced,0.27264533440272015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,2,balanced,0.3234986662864685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,2,balanced,0.30898133913675946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,2,balanced,0.31727999448776245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,2,balanced,0.5756053527196249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,2,balanced,0.5636213223139445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,2,power_law_1.2,0.044870400428771974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,2,power_law_1.2,0.04694400131702423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,2,power_law_1.2,0.04839679896831513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,2,power_law_1.2,0.054553598165512085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,2,power_law_1.2,0.05998079776763916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,2,power_law_1.2,0.06563839912414551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,2,power_law_1.2,0.07223039865493774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,2,power_law_1.2,0.0759552001953125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,2,power_law_1.2,0.0769919991493225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,2,power_law_1.2,0.07713919878005981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,2,power_law_1.2,0.08208000063896179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,2,power_law_1.2,0.08432639837265014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,2,power_law_1.2,0.0876800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,2,power_law_1.2,0.0938368022441864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,2,power_law_1.2,0.1047551989555359
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,2,power_law_1.2,0.1078336000442505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,2,power_law_1.2,0.1130687952041626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,2,power_law_1.2,0.13070080280303956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,2,power_law_1.2,0.14521600008010865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,2,power_law_1.2,0.18332159519195557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,2,power_law_1.2,0.2176192045211792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,2,power_law_1.2,0.2776959896087646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,2,power_law_1.2,0.3422399997711182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,2,power_law_1.2,0.49781122207641604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,2,power_law_1.2,0.6116352081298828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,2,power_law_1.2,0.901420783996582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,2,power_law_1.2,1.1215871810913085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,8,balanced,0.04053333401679993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,8,balanced,0.0418453315893809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,8,balanced,0.04203199843565623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,8,balanced,0.04433066646258036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,8,balanced,0.04834666848182678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,8,balanced,0.05203733344872793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,8,balanced,0.054010664423306785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,8,balanced,0.05429333448410034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,8,balanced,0.055071999629338585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,8,balanced,0.05423999826113383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,8,balanced,0.05602666735649109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,8,balanced,0.056405335664749146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,8,balanced,0.05596266686916351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,8,balanced,0.05821866790453593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,8,balanced,0.06246933341026306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,8,balanced,0.06283199787139893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,8,balanced,0.06694399813810985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,8,balanced,0.0749013324578603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,8,balanced,0.07678933441638947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,8,balanced,0.09344533085823059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,8,balanced,0.10107200344403584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,8,balanced,0.13470932841300964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,8,balanced,0.17268266280492148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,8,balanced,0.23843199014663696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,8,balanced,0.29121599594751996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,8,balanced,0.4172000090281169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,8,balanced,0.5245813528696696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,8,power_law_1.01,0.06034560203552246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,8,power_law_1.01,0.06949120163917541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,8,power_law_1.01,0.07273600101470948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,8,power_law_1.01,0.08247680068016053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,8,power_law_1.01,0.08229759931564332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,8,power_law_1.01,0.08155519962310791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,8,power_law_1.01,0.08236799836158752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,8,power_law_1.01,0.08206080198287964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,8,power_law_1.01,0.07946879863739013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,8,power_law_1.01,0.08414080142974853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,8,power_law_1.01,0.08399999737739564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,8,power_law_1.01,0.082259202003479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,8,power_law_1.01,0.08565760254859925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,8,power_law_1.01,0.08788480162620545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,8,power_law_1.01,0.09210879802703857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,8,power_law_1.01,0.09193599820137024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,8,power_law_1.01,0.09552639722824097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,8,power_law_1.01,0.10421760082244873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,8,power_law_1.01,0.11568000316619872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,8,power_law_1.01,0.12963839769363403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,8,power_law_1.01,0.15270400047302246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,8,power_law_1.01,0.1877120018005371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,8,power_law_1.01,0.22724480628967286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,8,power_law_1.01,0.30424959659576417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,1,power_law_1.2,0.08963199853897094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,8,power_law_1.01,0.3852992057800293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,1,power_law_1.2,0.1018496036529541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,8,power_law_1.01,0.5373824119567872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,1,power_law_1.2,0.1282047986984253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,8,power_law_1.01,0.6709248065948487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,1,power_law_1.2,0.1726591944694519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,1,power_law_1.2,0.20451838970184327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,1,power_law_1.2,0.2522495985031128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,1,power_law_1.2,0.32743039131164553
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,1,power_law_1.2,0.3439039945602417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,1,power_law_1.2,0.3542207956314087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,1,power_law_1.2,0.3530879974365234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,1,power_law_1.2,0.3626431941986084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,1,power_law_1.2,0.37496318817138674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,1,power_law_1.2,0.37927680015563964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,1,power_law_1.2,0.3884160041809082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,1,power_law_1.2,0.4015679836273193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,1,power_law_1.2,0.3992448091506958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,1,power_law_1.2,0.4288320064544678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,1,power_law_1.2,0.46715521812438965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,1,power_law_1.2,0.5127295970916748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,1,power_law_1.2,0.5977856159210205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,1,power_law_1.2,0.6764287948608398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,1,power_law_1.2,0.8324095726013183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,1,power_law_1.2,0.9825728416442872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,1,power_law_1.2,1.2767487525939942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,1,power_law_1.2,1.5766207695007324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,1,power_law_1.2,2.175923156738281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,1,power_law_1.2,2.7750911712646484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,16,power_law_1.01,0.01764480024576187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,16,power_law_1.01,0.018297599256038667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,16,power_law_1.01,0.01820160001516342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,16,power_law_1.01,0.01850239932537079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,16,power_law_1.01,0.018617600202560425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,16,power_law_1.01,0.020927999913692475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,16,power_law_1.01,0.020582400262355804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,16,power_law_1.01,0.020550400018692017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,16,power_law_1.01,0.025382399559020996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,16,power_law_1.01,0.025523200631141663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,16,power_law_1.01,0.025990399718284606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,16,power_law_1.01,0.03861759901046753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,16,power_law_1.01,0.038176000118255615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,16,power_law_1.01,0.038649600744247434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,16,power_law_1.01,0.038438400626182555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,16,power_law_1.01,0.03988479971885681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,16,power_law_1.01,0.04156799912452698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,16,power_law_1.01,0.042668798565864564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,16,power_law_1.01,0.04346239864826203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,16,power_law_1.01,0.04766719937324524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,16,power_law_1.01,0.050406402349472045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,16,power_law_1.01,0.06216959953308106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,16,power_law_1.01,0.0760640025138855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,16,power_law_1.01,0.09463040232658386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,16,power_law_1.01,0.11216000318527222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,16,power_law_1.01,0.15957759618759154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,16,power_law_1.01,0.19838080406188965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,2,balanced,0.06651199857393901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,2,balanced,0.06909866631031036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,2,balanced,0.07919999957084656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,2,balanced,0.09525332848230998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,2,balanced,0.1360106666882833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,2,balanced,0.19367466370264688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,2,balanced,0.18953067064285278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,2,balanced,0.18453333775202432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,2,balanced,0.18745599190394083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,2,balanced,0.18461867173512778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,2,balanced,0.18203200896581015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,2,balanced,0.18046400944391885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,2,balanced,0.18125333388646445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,2,balanced,0.19005866845448813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,2,balanced,0.18997865915298462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,2,balanced,0.192522664864858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,2,balanced,0.20388267437616983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,2,balanced,0.21570134162902832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,2,balanced,0.2321173350016276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,2,balanced,0.2605546712875366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,2,balanced,0.29046400388081867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,2,balanced,0.3547786474227905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,2,balanced,0.42139732837677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,2,balanced,0.6315360069274902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,2,balanced,0.760042667388916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,2,balanced,1.1049919923146565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,2,balanced,1.4172159830729167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,2,power_law_1.2,0.024006399512290954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,2,power_law_1.2,0.0358271986246109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,2,power_law_1.2,0.047731199860572816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,2,power_law_1.2,0.06512640118598938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,2,power_law_1.2,0.0769599974155426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,2,power_law_1.2,0.09957759976387023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,2,power_law_1.2,0.1335744023323059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,2,power_law_1.2,0.12711679935455322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,2,power_law_1.2,0.12486399412155151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,2,power_law_1.2,0.13264000415802002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,2,power_law_1.2,0.139136004447937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,2,power_law_1.2,0.14642560482025146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,2,power_law_1.2,0.1466431975364685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,2,power_law_1.2,0.15470080375671386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,2,power_law_1.2,0.16634880304336547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,2,power_law_1.2,0.17469439506530762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,2,power_law_1.2,0.18420480489730834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,2,power_law_1.2,0.20953600406646727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,2,power_law_1.2,0.2091007947921753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,2,power_law_1.2,0.2580672025680542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,2,power_law_1.2,0.2500096082687378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,2,power_law_1.2,0.3117248058319092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,2,power_law_1.2,0.34108800888061525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,2,power_law_1.2,0.4250175952911377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,2,power_law_1.2,0.4887231826782227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,2,power_law_1.2,0.6465536117553711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,2,power_law_1.2,0.8107135772705079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,2,balanced,0.08912533521652222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,2,balanced,0.09075733025868733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,2,balanced,0.09556266665458679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,2,balanced,0.10775466759999593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,2,balanced,0.13121599952379862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,2,balanced,0.1665386656920115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,2,balanced,0.16766399145126343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,2,balanced,0.17139200369517008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,2,balanced,0.17150400082270303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,2,balanced,0.1750239928563436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,2,balanced,0.1742186745007833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,2,balanced,0.17748266458511353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,2,balanced,0.17921600739161173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,2,balanced,0.18248534202575684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,2,balanced,0.1829493244489034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,2,balanced,0.18528532981872559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,2,balanced,0.19194666544596353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,2,balanced,0.20639467239379883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,2,balanced,0.22023467222849527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,2,balanced,0.2516000072161357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,2,balanced,0.2799893418947856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,2,balanced,0.33930134773254395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,2,balanced,0.4019786516825358
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,2,balanced,0.573365330696106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,2,balanced,0.6950346628824869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,2,balanced,1.0013386408487956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,2,balanced,1.3089120388031006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,16,power_law_1.2,0.08329600095748901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,16,power_law_1.2,0.10817919969558716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,16,power_law_1.2,0.09523199796676636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,16,power_law_1.2,0.10728960037231446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,16,power_law_1.2,0.11241600513458253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,16,power_law_1.2,0.09013760089874268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,16,power_law_1.2,0.1003648042678833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,16,power_law_1.2,0.10189440250396728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,16,power_law_1.2,0.1028480052947998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,16,power_law_1.2,0.09976959824562073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,16,power_law_1.2,0.10382720232009887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,16,power_law_1.2,0.1069375991821289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,16,power_law_1.2,0.10430719852447509
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,16,power_law_1.2,0.10773760080337524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,16,power_law_1.2,0.11493120193481446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,16,power_law_1.2,0.11669759750366211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,16,power_law_1.2,0.12546559572219848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,16,power_law_1.2,0.14033279418945313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,16,power_law_1.2,0.14595199823379518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,16,power_law_1.2,0.17301119565963746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,16,power_law_1.2,0.18175359964370727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,16,power_law_1.2,0.23962879180908203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,16,power_law_1.2,0.24586238861083984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,16,power_law_1.2,0.32448639869689944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,16,power_law_1.2,0.3869760036468506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,16,power_law_1.2,0.5748223781585693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,16,power_law_1.2,0.6603968143463135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,4,balanced,0.04159999887148539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,4,balanced,0.041989331444104515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,4,balanced,0.043840001026789345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,4,balanced,0.04610666632652283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,4,balanced,0.053823997577031456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,4,balanced,0.06177066763242086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,4,balanced,0.062496001521746315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,4,balanced,0.06399466594060262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,4,balanced,0.0641653339068095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,4,balanced,0.06400533517201741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,4,balanced,0.06258666515350342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,4,balanced,0.06429333488146464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,4,balanced,0.06422933439413707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,4,balanced,0.0687253326177597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,4,balanced,0.0731573353211085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,4,balanced,0.07438399891058604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,4,balanced,0.08041599889596303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,4,balanced,0.08917333682378133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,4,balanced,0.0941386620203654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,4,balanced,0.11517866452534993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,4,balanced,0.1405280033747355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,4,balanced,0.18514132499694824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,4,balanced,0.21567465861638388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,4,balanced,0.3017759919166565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,4,balanced,0.37852799892425537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,4,balanced,0.5548640092213949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,4,balanced,0.7072640260060629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,64,power_law_1.2,0.040403199195861814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,64,power_law_1.2,0.04078719913959503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,64,power_law_1.2,0.040524798631668094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,64,power_law_1.2,0.043750399351119997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,64,power_law_1.2,0.043993601202964784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,64,power_law_1.2,0.045203199982643126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,64,power_law_1.2,0.045900800824165346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,64,power_law_1.2,0.04715520143508911
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,64,power_law_1.2,0.04809600114822388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,64,power_law_1.2,0.04769920110702515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,64,power_law_1.2,0.0482367992401123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,64,power_law_1.2,0.048332801461219786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,64,power_law_1.2,0.04849919974803925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,64,power_law_1.2,0.05140479803085327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,64,power_law_1.2,0.05685120224952698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,64,power_law_1.2,0.05740799903869629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,64,power_law_1.2,0.06266239881515503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,64,power_law_1.2,0.06741759777069092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,64,power_law_1.2,0.07596160173416137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,64,power_law_1.2,0.09253759980201721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,64,power_law_1.2,0.11319680213928222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,64,power_law_1.2,0.14465919733047486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,64,power_law_1.2,0.18017280101776123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,64,power_law_1.2,0.2500416040420532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,64,power_law_1.2,0.31825919151306153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,64,power_law_1.2,0.4941567897796631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,64,power_law_1.2,0.7258815765380859
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,1,power_law_1.01,0.0239424005150795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,1,power_law_1.01,0.037171199917793274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,1,power_law_1.01,0.05408639907836914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,1,power_law_1.01,0.08119040131568908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,1,power_law_1.01,0.11215360164642334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,1,power_law_1.01,0.15639679431915282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,1,power_law_1.01,0.19680000543594361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,1,power_law_1.01,0.210316801071167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,1,power_law_1.01,0.21816959381103515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,1,power_law_1.01,0.231660795211792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,1,power_law_1.01,0.23388800621032715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,1,power_law_1.01,0.24280960559844972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,1,power_law_1.01,0.24635519981384277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,1,power_law_1.01,0.25505919456481935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,1,power_law_1.01,0.26885759830474854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,1,power_law_1.01,0.2665152072906494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,1,power_law_1.01,0.2892735958099365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,1,power_law_1.01,0.3236991882324219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,1,power_law_1.01,0.33292160034179685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,1,power_law_1.01,0.4083456039428711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,1,power_law_1.01,0.41843838691711427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,1,power_law_1.01,0.5285568237304688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,1,power_law_1.01,0.5034751892089844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,1,power_law_1.01,0.6481215953826904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,1,power_law_1.01,0.8143936157226562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,1,power_law_1.01,1.123020839691162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,1,power_law_1.01,1.444326400756836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,8,power_law_1.2,0.04748159945011139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,8,power_law_1.2,0.06047359704971313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,8,power_law_1.2,0.056403201818466184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,8,power_law_1.2,0.06507520079612732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,8,power_law_1.2,0.0759872019290924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,8,power_law_1.2,0.0763264000415802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,8,power_law_1.2,0.07328640222549439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,8,power_law_1.2,0.07269759774208069
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,8,power_law_1.2,0.07349119782447815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,8,power_law_1.2,0.07192320227622986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,8,power_law_1.2,0.07502080202102661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,8,power_law_1.2,0.07842559814453125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,8,power_law_1.2,0.08291199803352356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,8,power_law_1.2,0.08176640272140503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,8,power_law_1.2,0.0895359992980957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,8,power_law_1.2,0.09195520281791687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,8,power_law_1.2,0.09703680276870727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,8,power_law_1.2,0.10639359951019287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,8,power_law_1.2,0.10796799659729003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,8,power_law_1.2,0.1274880051612854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,8,power_law_1.2,0.14279680252075194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,8,power_law_1.2,0.18375680446624756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,8,power_law_1.2,0.2108544111251831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,8,power_law_1.2,0.27127039432525635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,8,power_law_1.2,0.34013440608978274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,8,power_law_1.2,0.4287231922149658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,8,power_law_1.2,0.5972608089447021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,8,balanced,0.04560533165931702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,8,balanced,0.04582933088143667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,8,balanced,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,8,balanced,0.05195199946562449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,8,balanced,0.058186665177345276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,8,balanced,0.07507733503977458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,8,balanced,0.07660266757011414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,8,balanced,0.07692266503969829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,8,balanced,0.07769066592057546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,8,balanced,0.07760533193747203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,8,balanced,0.08057599763075511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,8,balanced,0.08132266501585643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,8,balanced,0.08052266637484233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,8,balanced,0.08469866712888081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,8,balanced,0.08695466319719951
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,8,balanced,0.08987733721733093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,8,balanced,0.09491200248400371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,8,balanced,0.10115733742713928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,8,balanced,0.10627200206120808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,8,balanced,0.12071466445922852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,8,balanced,0.1455413301785787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,8,balanced,0.16801067193349203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,8,balanced,0.19284266233444214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,8,balanced,0.25617067019144696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,8,balanced,0.30371199051539105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,8,balanced,0.422757347424825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,8,balanced,0.53330131371816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,1,balanced,0.07406933108965556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,1,balanced,0.10625599821408589
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,1,balanced,0.16619199514389038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,1,balanced,0.29268266757329303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,1,balanced,0.5373280048370361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,1,balanced,1.0192053318023682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,1,balanced,1.0241813659667969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,1,balanced,1.0234239896138508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,1,balanced,1.0327466328938801
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,1,balanced,1.031989336013794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,1,balanced,1.0351253350575764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,1,balanced,1.0456799666086833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,1,balanced,1.0463146368662517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,1,balanced,1.0552000204722087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,1,balanced,1.0686559677124023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,1,balanced,1.0715733369191487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,1,balanced,1.085098663965861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,1,balanced,1.1141760349273682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,1,balanced,1.140544017155965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,1,balanced,1.1943519910176594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,1,balanced,1.262288014094035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,1,balanced,1.3520906766255696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,1,balanced,1.5156052907307942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,1,balanced,1.8152532577514648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,1,balanced,1.866389274597168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,1,balanced,2.6361865997314453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,1,balanced,2.9446773529052734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,2,power_law_1.2,0.04252159893512726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,2,power_law_1.2,0.04392960071563721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,2,power_law_1.2,0.04585599899291992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,2,power_law_1.2,0.05351679921150208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,2,power_law_1.2,0.06268799901008607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,2,power_law_1.2,0.06901119947433472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,2,power_law_1.2,0.08456959724426269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,2,power_law_1.2,0.08443520069122315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,2,power_law_1.2,0.08633599877357483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,2,power_law_1.2,0.09154559969902039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,2,power_law_1.2,0.08946560025215149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,2,power_law_1.2,0.09162880182266235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,2,power_law_1.2,0.09338880181312562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,2,power_law_1.2,0.10031360387802124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,2,power_law_1.2,0.10782079696655274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,2,power_law_1.2,0.11474560499191284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,2,power_law_1.2,0.13370239734649658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,2,power_law_1.2,0.14919040203094483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,2,power_law_1.2,0.159660804271698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,2,power_law_1.2,0.18462079763412476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,2,power_law_1.2,0.2201535940170288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,2,power_law_1.2,0.2823231935501099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,2,power_law_1.2,0.35201919078826904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,2,power_law_1.2,0.47246079444885253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,2,power_law_1.2,0.5929088115692138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,2,power_law_1.2,0.8291263580322266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,2,power_law_1.2,1.1577664375305177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,64,balanced,0.06029333174228668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,64,balanced,0.04411733150482178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,64,balanced,0.04196266829967499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,64,balanced,0.04572266836961111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,64,balanced,0.04602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,64,balanced,0.06234666705131531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,64,balanced,0.06201066573460897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,64,balanced,0.0633546660343806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,64,balanced,0.06362133224805196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,64,balanced,0.06417599817117055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,64,balanced,0.06427200138568878
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,64,balanced,0.06533333162466685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,64,balanced,0.06634133557478587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,64,balanced,0.06651199857393901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,64,balanced,0.06900799771149953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,64,balanced,0.07195200026035309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,64,balanced,0.070783997575442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,64,balanced,0.07644799848397572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,64,balanced,0.08098133405049641
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,64,balanced,0.08798933029174805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,64,balanced,0.09498666723569234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,64,balanced,0.10853866736094157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,64,balanced,0.12114133437474568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,64,balanced,0.15470932920773825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,64,balanced,0.17673067251841226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,64,balanced,0.2422986626625061
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,64,balanced,0.2898293336232503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,1,power_law_1.01,0.10615040063858032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,1,power_law_1.01,0.1115839958190918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,1,power_law_1.01,0.1474303960800171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,1,power_law_1.01,0.165011203289032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,1,power_law_1.01,0.18339840173721314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,1,power_law_1.01,0.20206079483032227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,1,power_law_1.01,0.2369663953781128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,1,power_law_1.01,0.241811203956604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,1,power_law_1.01,0.24890880584716796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,1,power_law_1.01,0.25382399559020996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,1,power_law_1.01,0.26396799087524414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,1,power_law_1.01,0.2678911924362183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,1,power_law_1.01,0.27584640979766845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,1,power_law_1.01,0.2836992025375366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,1,power_law_1.01,0.28204801082611086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,1,power_law_1.01,0.29208319187164306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,1,power_law_1.01,0.3018687963485718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,1,power_law_1.01,0.331494402885437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,1,power_law_1.01,0.3626176118850708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,1,power_law_1.01,0.4333504199981689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,1,power_law_1.01,0.5113791942596435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,1,power_law_1.01,0.6203904151916504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,1,power_law_1.01,0.7537792205810547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,4,power_law_1.2,0.01796479970216751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,1,power_law_1.01,1.017356777191162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,4,power_law_1.2,0.019539199769496918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,4,power_law_1.2,0.021593600511550903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,1,power_law_1.01,1.2836799621582031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,4,power_law_1.2,0.025190401077270507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,4,power_law_1.2,0.02733440101146698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,1,power_law_1.01,1.8468544006347656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,4,power_law_1.2,0.028697600960731505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,4,power_law_1.2,0.03586559891700745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,4,power_law_1.2,0.033855998516082765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,4,power_law_1.2,0.03712640106678009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,1,power_law_1.01,2.393356704711914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,4,power_law_1.2,0.03726719915866852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,4,power_law_1.2,0.03962239921092987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,4,power_law_1.2,0.041382399201393125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,4,power_law_1.2,0.04195840060710907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,4,power_law_1.2,0.04332799911499023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,4,power_law_1.2,0.05348479747772217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,4,power_law_1.2,0.052288001775741576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,4,power_law_1.2,0.0531711995601654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,4,power_law_1.2,0.07655680179595947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,4,power_law_1.2,0.08771839737892151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,4,power_law_1.2,0.09227520227432251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,4,power_law_1.2,0.09648000001907349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,4,power_law_1.2,0.10981119871139526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,4,power_law_1.2,0.12040319442749023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,4,power_law_1.2,0.15012480020523072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,4,power_law_1.2,0.18336000442504882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,4,power_law_1.2,0.24183039665222167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,4,power_law_1.2,0.2880511999130249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,8,power_law_1.01,0.04081279933452606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,8,power_law_1.01,0.04385280013084412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,8,power_law_1.01,0.042105600237846375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,8,power_law_1.01,0.04805760085582733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,8,power_law_1.01,0.049702399969100954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,8,power_law_1.01,0.05080320239067078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,8,power_law_1.01,0.050732797384262084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,8,power_law_1.01,0.05055999755859375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,8,power_law_1.01,0.05139200091361999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,8,power_law_1.01,0.05114240050315857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,8,power_law_1.01,0.05326719880104065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,8,power_law_1.01,0.05609599947929382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,8,power_law_1.01,0.05603839755058289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,8,power_law_1.01,0.05990399718284607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,8,power_law_1.01,0.06675840020179749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,8,power_law_1.01,0.07049599885940552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,8,power_law_1.01,0.07103360295295716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,8,power_law_1.01,0.08080639839172363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,8,power_law_1.01,0.08753920197486878
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,8,power_law_1.01,0.11058559417724609
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,8,power_law_1.01,0.12768640518188476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,8,power_law_1.01,0.16421120166778563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,8,power_law_1.01,0.2008064031600952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,8,power_law_1.01,0.2696063995361328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,8,power_law_1.01,0.351910400390625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,8,power_law_1.01,0.47583999633789065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,8,power_law_1.01,0.6104703903198242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,64,power_law_1.01,0.01724800020456314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,64,power_law_1.01,0.017056000232696534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,64,power_law_1.01,0.017606399953365326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,64,power_law_1.01,0.018380799889564516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,64,power_law_1.01,0.019417600333690645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,64,power_law_1.01,0.02110079973936081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,64,power_law_1.01,0.034969601035118106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,64,power_law_1.01,0.03519999980926514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,64,power_law_1.01,0.029017600417137145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,64,power_law_1.01,0.029100799560546876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,64,power_law_1.01,0.029183998703956604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,64,power_law_1.01,0.02980479896068573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,64,power_law_1.01,0.029216000437736513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,64,power_law_1.01,0.02922239899635315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,64,power_law_1.01,0.029747200012207032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,64,power_law_1.01,0.030713599920272828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,64,power_law_1.01,0.03278720080852508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,64,power_law_1.01,0.034944000840187076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,64,power_law_1.01,0.03586559891700745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,64,power_law_1.01,0.041305598616600034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,64,power_law_1.01,0.044019201397895814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,64,power_law_1.01,0.0541055977344513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,64,power_law_1.01,0.06895999908447266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,64,power_law_1.01,0.09155840277671815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,64,power_law_1.01,0.10727039575576783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,64,power_law_1.01,0.14646400213241578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,64,power_law_1.01,0.19366400241851806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,16,power_law_1.2,0.01825280040502548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,16,power_law_1.2,0.018515199422836304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,16,power_law_1.2,0.01822720021009445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,16,power_law_1.2,0.018163199722766876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,16,power_law_1.2,0.01907840073108673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,16,power_law_1.2,0.018745599687099455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,16,power_law_1.2,0.021536000072956085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,16,power_law_1.2,0.020787200331687926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,16,power_law_1.2,0.026015999913215637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,16,power_law_1.2,0.026284798979759216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,16,power_law_1.2,0.02560639977455139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,16,power_law_1.2,0.03873920142650604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,16,power_law_1.2,0.04017280042171478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,16,power_law_1.2,0.03863039910793305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,16,power_law_1.2,0.03864319920539856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,16,power_law_1.2,0.03929600119590759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,16,power_law_1.2,0.039878401160240176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,16,power_law_1.2,0.04010879993438721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,16,power_law_1.2,0.04345600008964538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,16,power_law_1.2,0.04721280038356781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,16,power_law_1.2,0.050995200872421265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,16,power_law_1.2,0.06680319905281067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,16,power_law_1.2,0.08018559813499451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,16,power_law_1.2,0.09631360173225403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,16,power_law_1.2,0.12184319496154786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,16,power_law_1.2,0.16325759887695312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,16,power_law_1.2,0.2033535957336426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,16,power_law_1.01,0.08181120157241821
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,16,power_law_1.01,0.1080896019935608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,16,power_law_1.01,0.10084480047225952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,16,power_law_1.01,0.1054144024848938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,16,power_law_1.01,0.11144319772720337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,16,power_law_1.01,0.09390079975128174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,16,power_law_1.01,0.10122879743576049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,16,power_law_1.01,0.10584959983825684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,16,power_law_1.01,0.1041983962059021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,16,power_law_1.01,0.10154240131378174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,16,power_law_1.01,0.10081919431686401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,16,power_law_1.01,0.10473599433898925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,16,power_law_1.01,0.10512000322341919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,16,power_law_1.01,0.10759680271148682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,16,power_law_1.01,0.1146880030632019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,16,power_law_1.01,0.11175680160522461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,16,power_law_1.01,0.12054400444030762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,16,power_law_1.01,0.14135040044784547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,16,power_law_1.01,0.1401535987854004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,16,power_law_1.01,0.15559040307998656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,16,power_law_1.01,0.16992640495300293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,16,power_law_1.01,0.2032320022583008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,16,power_law_1.01,0.23274879455566405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,16,power_law_1.01,0.2970880031585693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,16,power_law_1.01,0.34847359657287597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,16,power_law_1.01,0.4874368190765381
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,16,power_law_1.01,0.5634175777435303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,4,power_law_1.01,0.02396160066127777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,4,power_law_1.01,0.037190398573875426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,4,power_law_1.01,0.043372800946235655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,4,power_law_1.01,0.05480960011482239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,4,power_law_1.01,0.06158080101013184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,4,power_law_1.01,0.06855679750442505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,4,power_law_1.01,0.08479999899864196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,4,power_law_1.01,0.08268799781799316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,4,power_law_1.01,0.08855680227279664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,4,power_law_1.01,0.08634240031242371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,4,power_law_1.01,0.08718079924583436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,4,power_law_1.01,0.08962560296058655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,4,power_law_1.01,0.08859519958496094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,4,power_law_1.01,0.09312639832496643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,4,power_law_1.01,0.12792320251464845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,4,power_law_1.01,0.11778559684753417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,4,power_law_1.01,0.11352959871292115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,4,power_law_1.01,0.18298239707946778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,4,power_law_1.01,0.19939839839935303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,4,power_law_1.01,0.18906240463256835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,4,power_law_1.01,0.189900803565979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,4,power_law_1.01,0.1903488039970398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,4,power_law_1.01,0.21224958896636964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,4,power_law_1.01,0.26679039001464844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,4,power_law_1.01,0.30543999671936034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,4,power_law_1.01,0.43802242279052733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,4,power_law_1.01,0.5384768009185791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,4,power_law_1.01,0.04538240134716034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,4,power_law_1.01,0.04632959961891174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,4,power_law_1.01,0.05002880096435547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,4,power_law_1.01,0.05817599892616272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,4,power_law_1.01,0.06792320013046264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,4,power_law_1.01,0.07123839855194092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,4,power_law_1.01,0.07949439883232116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,4,power_law_1.01,0.07363839745521546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,4,power_law_1.01,0.07487360239028931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,4,power_law_1.01,0.07831040024757385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,4,power_law_1.01,0.08202239871025085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,4,power_law_1.01,0.08419839739799499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,4,power_law_1.01,0.08610560297966004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,4,power_law_1.01,0.09089279770851136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,4,power_law_1.01,0.09556480050086975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,4,power_law_1.01,0.10029439926147461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,4,power_law_1.01,0.10158079862594604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,4,power_law_1.01,0.11396479606628418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,4,power_law_1.01,0.12893439531326295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,4,power_law_1.01,0.15224319696426392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,4,power_law_1.01,0.15825920104980468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,4,power_law_1.01,0.1942911982536316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,4,power_law_1.01,0.2382080078125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,4,power_law_1.01,0.30696959495544435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,4,power_law_1.01,0.39984641075134275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,4,power_law_1.01,0.5603839874267578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,4,power_law_1.01,0.6774144172668457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,8,balanced,0.04038933416207632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,8,balanced,0.04353600243727366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,8,balanced,0.04374399781227112
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,8,balanced,0.05938666562239329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,8,balanced,0.08119999865690868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,8,balanced,0.11168000102043152
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,8,balanced,0.1139413317044576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,8,balanced,0.11399466792742412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,8,balanced,0.11546132961908977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,8,balanced,0.11340266466140747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,8,balanced,0.1165173351764679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,8,balanced,0.11749866604804993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,8,balanced,0.11760532855987549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,8,balanced,0.12105600039164226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,8,balanced,0.1256586710611979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,8,balanced,0.12717333436012268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,8,balanced,0.13120533029238382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,8,balanced,0.13874666889508566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,8,balanced,0.1444480021794637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,8,balanced,0.16266133387883505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,8,balanced,0.18000533183415732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,8,balanced,0.21291200319925943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,8,balanced,0.2405866583188375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,8,balanced,0.3142133355140686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,8,balanced,0.3784053325653076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,8,balanced,0.5375839869181315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,8,balanced,0.645466685295105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,4,balanced,0.04177066683769226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,4,balanced,0.04357333481311798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,4,balanced,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,4,balanced,0.04824000100294749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,4,balanced,0.05454400181770325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,4,balanced,0.07247999807198842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,4,balanced,0.07449600100517273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,4,balanced,0.07472000022729237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,4,balanced,0.07612800101439159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,4,balanced,0.07617599765459697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,4,balanced,0.07509333391984303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,4,balanced,0.0765066643555959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,4,balanced,0.07825600107510884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,4,balanced,0.08303466439247131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,4,balanced,0.08669333656628926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,4,balanced,0.0886346697807312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,4,balanced,0.09430932998657227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,4,balanced,0.10430933038393657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,4,balanced,0.10993066430091858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,4,balanced,0.13132266203562418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,4,balanced,0.14839466412862143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,4,balanced,0.19405867656071982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,4,balanced,0.22900799910227457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,4,balanced,0.3136799931526184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,4,balanced,0.39340798060099286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,4,balanced,0.5744906663894653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,4,balanced,0.7382506529490153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,2,balanced,0.04064533362785975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,2,balanced,0.04165333261092504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,2,balanced,0.04372266431649526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,2,balanced,0.04799999793370565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,2,balanced,0.05874133110046387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,2,balanced,0.08076266447703044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,2,balanced,0.08070933322111766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,2,balanced,0.08196266492207845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,2,balanced,0.08402666449546814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,2,balanced,0.08295999964078267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,2,balanced,0.08477866649627686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,2,balanced,0.08773333827654521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,2,balanced,0.08805867036183675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,2,balanced,0.08889066179593404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,2,balanced,0.09598933657010396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,2,balanced,0.09694400429725647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,2,balanced,0.10155733426411946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,2,balanced,0.11430399616559346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,2,balanced,0.1270133356253306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,2,balanced,0.14604266484578451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,2,balanced,0.1755573352177938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,2,balanced,0.21497066815694174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,2,balanced,0.2671733299891154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,2,balanced,0.38900800546010333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,2,balanced,0.4651840130488078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,2,balanced,0.6700106461842855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,2,balanced,0.8579946358998617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,1,balanced,0.05972800155480703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,1,balanced,0.07797866563002269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,1,balanced,0.10973866780598958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,1,balanced,0.1745120088259379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,1,balanced,0.29954665899276733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,1,balanced,0.5449279944101969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,1,balanced,0.5501013199488322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,1,balanced,0.5466880003611246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,1,balanced,0.5536640087763468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,1,balanced,0.5541066726048788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,1,balanced,0.5596266587575277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,1,balanced,0.5654613176981608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,1,balanced,0.569546659787496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,1,balanced,0.5792800188064575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,1,balanced,0.5872053305308024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,1,balanced,0.5930879910786947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,1,balanced,0.6074933211008707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,1,balanced,0.6408480008443197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,1,balanced,0.666154662768046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,1,balanced,0.7326133251190186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,1,balanced,0.7948213418324789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,1,balanced,0.8933173020680746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,1,balanced,1.0085759957631428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,1,balanced,1.347098668416341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,1,balanced,1.5053760210673015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,1,balanced,2.1971413294474282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,1,balanced,2.6560212771097818
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,16,balanced,0.045994664231936135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,16,balanced,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,16,balanced,0.0461760014295578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,16,balanced,0.048101335763931274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,16,balanced,0.05374933282534281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,16,balanced,0.05440000196297964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,16,balanced,0.055914665261904396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,16,balanced,0.056143999099731445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,16,balanced,0.056464001536369324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,16,balanced,0.05625066657861074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,16,balanced,0.058133333921432495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,16,balanced,0.05805333455403646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,16,balanced,0.058117335041364036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,16,balanced,0.060266668597857155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,16,balanced,0.06386666496594746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,16,balanced,0.06414400041103363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,16,balanced,0.06809600194295247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,16,balanced,0.07230400045712788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,16,balanced,0.07650133470694225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,16,balanced,0.0869813362757365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,16,balanced,0.09763733545939128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,16,balanced,0.11384532848993938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,16,balanced,0.13955199718475342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,16,balanced,0.1916053295135498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,16,balanced,0.2242506742477417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,16,balanced,0.31174933910369873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,16,balanced,0.39388267199198407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,1,power_law_1.01,0.05079039931297302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,1,power_law_1.01,0.061862397193908694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,1,power_law_1.01,0.07553279995918274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,1,power_law_1.01,0.1125823974609375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,1,power_law_1.01,0.14224640130996705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,1,power_law_1.01,0.17856639623641968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,1,power_law_1.01,0.2313152074813843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,1,power_law_1.01,0.2400576114654541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,1,power_law_1.01,0.24983680248260498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,1,power_law_1.01,0.2594304084777832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,1,power_law_1.01,0.2648256063461304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,1,power_law_1.01,0.2723392009735107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,1,power_law_1.01,0.27520639896392823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,1,power_law_1.01,0.285427188873291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,1,power_law_1.01,0.2935551881790161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,1,power_law_1.01,0.30696959495544435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,1,power_law_1.01,0.31281280517578125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,1,power_law_1.01,0.34990720748901366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,1,power_law_1.01,0.36399359703063966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,1,power_law_1.01,0.4267392158508301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,1,power_law_1.01,0.47565441131591796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,1,power_law_1.01,0.583129596710205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,1,power_law_1.01,0.6839295864105225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,1,power_law_1.01,0.8823424339294433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,1,power_law_1.01,1.0825984001159668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,1,power_law_1.01,1.4898240089416503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,1,power_law_1.01,1.8770624160766602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,32,power_law_1.2,0.06448000073432922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,32,power_law_1.2,0.06238080263137817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,32,power_law_1.2,0.061689597368240354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,32,power_law_1.2,0.06656000018119812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,32,power_law_1.2,0.06600959897041321
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,32,power_law_1.2,0.06325759887695312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,32,power_law_1.2,0.06568319797515869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,32,power_law_1.2,0.06605439782142639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,32,power_law_1.2,0.06561920046806335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,32,power_law_1.2,0.0649728000164032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,32,power_law_1.2,0.06928640007972717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,32,power_law_1.2,0.06983680129051209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,32,power_law_1.2,0.07181439995765686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,32,power_law_1.2,0.07157120108604431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,32,power_law_1.2,0.07561600208282471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,32,power_law_1.2,0.07681919932365418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,32,power_law_1.2,0.08332800269126892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,32,power_law_1.2,0.09449599981307984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,32,power_law_1.2,0.09772160053253173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,32,power_law_1.2,0.12632960081100464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,32,power_law_1.2,0.142739200592041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,32,power_law_1.2,0.19541120529174805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,32,power_law_1.2,0.2382335901260376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,32,power_law_1.2,0.323526406288147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,32,power_law_1.2,0.44778242111206057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,32,power_law_1.2,0.6619455814361572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,32,power_law_1.2,0.9047679901123047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,4,power_law_1.2,0.04469119906425476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,4,power_law_1.2,0.046374401450157164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,4,power_law_1.2,0.04387840032577515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,4,power_law_1.2,0.04805760085582733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,4,power_law_1.2,0.048537600040435794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,4,power_law_1.2,0.051526397466659546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,4,power_law_1.2,0.054079997539520266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,4,power_law_1.2,0.05553280115127564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,4,power_law_1.2,0.05580800175666809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,4,power_law_1.2,0.056831997632980344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,4,power_law_1.2,0.056704002618789676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,4,power_law_1.2,0.05782399773597717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,4,power_law_1.2,0.059539198875427246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,4,power_law_1.2,0.06283519864082336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,4,power_law_1.2,0.06726400256156921
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,4,power_law_1.2,0.06809599995613098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,4,power_law_1.2,0.07643520236015319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,4,power_law_1.2,0.09014400243759155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,4,power_law_1.2,0.10947200059890747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,4,power_law_1.2,0.13631999492645264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,4,power_law_1.2,0.14890240430831908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,4,power_law_1.2,0.18455040454864502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,4,power_law_1.2,0.22652800083160402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,4,power_law_1.2,0.32052481174468994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,4,power_law_1.2,0.38611838817596433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,4,power_law_1.2,0.5531648159027099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,4,power_law_1.2,0.7185472011566162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,8,power_law_1.01,0.044223999977111815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,8,power_law_1.01,0.04547840058803558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,8,power_law_1.01,0.04322560131549835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,8,power_law_1.01,0.04408319890499115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,8,power_law_1.01,0.04505600035190582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,8,power_law_1.01,0.04652799963951111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,8,power_law_1.01,0.046751999855041505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,8,power_law_1.01,0.04842880070209503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,8,power_law_1.01,0.04736000001430511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,8,power_law_1.01,0.04772480130195618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,8,power_law_1.01,0.04901120066642761
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,8,power_law_1.01,0.04973439872264862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,8,power_law_1.01,0.04987519979476929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,8,power_law_1.01,0.05088000297546387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,8,power_law_1.01,0.05544319748878479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,8,power_law_1.01,0.05628160238265991
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,8,power_law_1.01,0.0606656014919281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,8,power_law_1.01,0.06752640008926392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,8,power_law_1.01,0.07501440048217774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,8,power_law_1.01,0.0871999979019165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,8,power_law_1.01,0.10313600301742554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,8,power_law_1.01,0.1325824022293091
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,8,power_law_1.01,0.16236159801483155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,8,power_law_1.01,0.220633602142334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,8,power_law_1.01,0.31436800956726074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,8,power_law_1.01,0.4302015781402588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,8,power_law_1.01,0.5483391761779786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,128,power_law_1.01,0.03856000006198883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,128,power_law_1.01,0.03930880129337311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,128,power_law_1.01,0.03868800103664398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,128,power_law_1.01,0.0425024002790451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,128,power_law_1.01,0.04382719993591309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,128,power_law_1.01,0.04470399916172028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,128,power_law_1.01,0.04676479995250702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,128,power_law_1.01,0.04684160053730011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,128,power_law_1.01,0.04672000110149384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,128,power_law_1.01,0.047219198942184445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,128,power_law_1.01,0.04958719909191132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,128,power_law_1.01,0.050860798358917235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,128,power_law_1.01,0.05006080269813538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,128,power_law_1.01,0.054016000032424925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,128,power_law_1.01,0.058956801891326904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,128,power_law_1.01,0.05731199979782105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,128,power_law_1.01,0.06340479850769043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,128,power_law_1.01,0.07054719924926758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,128,power_law_1.01,0.07687039971351624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,128,power_law_1.01,0.09217280149459839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,128,power_law_1.01,0.10845439434051514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,128,power_law_1.01,0.144268798828125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,128,power_law_1.01,0.1711359977722168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,128,power_law_1.01,0.2376960039138794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,128,power_law_1.01,0.3001471996307373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,128,power_law_1.01,0.4367487907409668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,128,power_law_1.01,0.5865280151367187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,32,power_law_1.01,0.018035200238227845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,32,power_law_1.01,0.017849600315093993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,32,power_law_1.01,0.017824000120162962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,32,power_law_1.01,0.01807360053062439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,32,power_law_1.01,0.01828480064868927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,32,power_law_1.01,0.019308799505233766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,32,power_law_1.01,0.023577600717544556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,32,power_law_1.01,0.0233024001121521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,32,power_law_1.01,0.036051198840141296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,32,power_law_1.01,0.036396801471710205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,32,power_law_1.01,0.036313599348068236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,32,power_law_1.01,0.03563520014286041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,32,power_law_1.01,0.03500159978866577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,32,power_law_1.01,0.034431999921798705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,32,power_law_1.01,0.03432320058345795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,32,power_law_1.01,0.034483200311660765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,32,power_law_1.01,0.03711360096931458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,32,power_law_1.01,0.03849599957466125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,32,power_law_1.01,0.040729600191116336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,32,power_law_1.01,0.04599680006504059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,32,power_law_1.01,0.04821119904518127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,32,power_law_1.01,0.0596671998500824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,32,power_law_1.01,0.07402240037918091
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,32,power_law_1.01,0.09191679954528809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,32,power_law_1.01,0.1160256028175354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,32,power_law_1.01,0.15351680517196656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,32,power_law_1.01,0.2042367935180664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,128,power_law_1.2,0.047891199588775635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,128,power_law_1.2,0.046137601137161255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,128,power_law_1.2,0.047839999198913574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,128,power_law_1.2,0.04907520115375519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,128,power_law_1.2,0.048895999789237976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,128,power_law_1.2,0.0498879998922348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,128,power_law_1.2,0.05079039931297302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,128,power_law_1.2,0.0515392005443573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,128,power_law_1.2,0.051577597856521606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,128,power_law_1.2,0.05139200091361999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,128,power_law_1.2,0.052134400606155394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,128,power_law_1.2,0.05279359817504883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,128,power_law_1.2,0.053472000360488894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,128,power_law_1.2,0.05383679866790771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,128,power_law_1.2,0.057651197910308837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,128,power_law_1.2,0.05904639959335327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,128,power_law_1.2,0.061273598670959474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,128,power_law_1.2,0.06694399714469909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,128,power_law_1.2,0.07279999852180481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,128,power_law_1.2,0.08626559972763062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,128,power_law_1.2,0.09561600089073181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,128,power_law_1.2,0.12854399681091308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,128,power_law_1.2,0.15594880580902098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,128,power_law_1.2,0.23074560165405272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,128,power_law_1.2,0.282207989692688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,128,power_law_1.2,0.4167488098144531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,128,power_law_1.2,0.5266816139221191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,4,power_law_1.2,0.0567359983921051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,4,power_law_1.2,0.06890239715576171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,4,power_law_1.2,0.06732800006866455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,4,power_law_1.2,0.08744320273399353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,4,power_law_1.2,0.09342719912528992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,4,power_law_1.2,0.10473599433898925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,4,power_law_1.2,0.1250496029853821
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,4,power_law_1.2,0.12697600126266478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,4,power_law_1.2,0.13544960021972657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,4,power_law_1.2,0.12609280347824098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,4,power_law_1.2,0.12510720491409302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,4,power_law_1.2,0.13723520040512086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,4,power_law_1.2,0.13157119750976562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,4,power_law_1.2,0.14320640563964843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,4,power_law_1.2,0.1502527952194214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,4,power_law_1.2,0.15457279682159425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,4,power_law_1.2,0.16602879762649536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,4,power_law_1.2,0.17964160442352295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,4,power_law_1.2,0.201363205909729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,4,power_law_1.2,0.23099520206451415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,4,power_law_1.2,0.2620928049087524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,4,power_law_1.2,0.32593278884887694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,4,power_law_1.2,0.3909375905990601
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,4,power_law_1.2,0.5078847885131836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,4,power_law_1.2,0.6368063926696778
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,4,power_law_1.2,0.8892671585083007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,4,power_law_1.2,1.1380864143371583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,1,power_law_1.01,0.07698559761047363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,1,power_law_1.01,0.11217279434204101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,1,power_law_1.01,0.1663807988166809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,1,power_law_1.01,0.2913216114044189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,1,power_law_1.01,0.4083456039428711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,1,power_law_1.01,0.5485951900482178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,1,power_law_1.01,0.7421184062957764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,1,power_law_1.01,0.7927680015563965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,1,power_law_1.01,0.8048512458801269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,1,power_law_1.01,0.8440256118774414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,1,power_law_1.01,0.8608384132385254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,1,power_law_1.01,0.8897024154663086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,1,power_law_1.01,0.8926527976989747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,1,power_law_1.01,0.8941823959350585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,1,power_law_1.01,0.9506431579589844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,1,power_law_1.01,0.9479040145874024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,1,power_law_1.01,1.016761589050293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,1,power_law_1.01,1.1014399528503418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,1,power_law_1.01,1.0866687774658204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,1,power_law_1.01,1.2158592224121094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,1,power_law_1.01,1.2394944190979005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,1,power_law_1.01,1.3987648010253906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,1,power_law_1.01,1.5713472366333008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,1,power_law_1.01,1.9089599609375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,1,power_law_1.01,2.17128963470459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,1,power_law_1.01,2.809337615966797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,1,power_law_1.01,3.416774368286133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,64,power_law_1.01,0.04410240054130554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,64,power_law_1.01,0.04301440119743347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,64,power_law_1.01,0.043110400438308716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,64,power_law_1.01,0.04593920111656189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,64,power_law_1.01,0.046700799465179445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,64,power_law_1.01,0.04832000136375427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,64,power_law_1.01,0.048716801404953006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,64,power_law_1.01,0.04933120012283325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,64,power_law_1.01,0.055238401889801024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,64,power_law_1.01,0.05565440058708191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,64,power_law_1.01,0.055641597509384154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,64,power_law_1.01,0.054079997539520266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,64,power_law_1.01,0.05555840134620667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,64,power_law_1.01,0.060601598024368285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,64,power_law_1.01,0.0659712016582489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,64,power_law_1.01,0.06445440053939819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,64,power_law_1.01,0.06952319741249084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,64,power_law_1.01,0.0756608009338379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,64,power_law_1.01,0.08414720296859741
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,64,power_law_1.01,0.09395840167999267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,64,power_law_1.01,0.11183359622955322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,64,power_law_1.01,0.15260159969329834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,64,power_law_1.01,0.1781823992729187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,64,power_law_1.01,0.24586238861083984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,64,power_law_1.01,0.3005120038986206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,64,power_law_1.01,0.4349376201629639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,64,power_law_1.01,0.5814976215362548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,2,power_law_1.01,0.07245439887046815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,2,power_law_1.01,0.08355200290679932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,2,power_law_1.01,0.09559680223464966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,2,power_law_1.01,0.11046400070190429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,2,power_law_1.01,0.12421760559082032
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,2,power_law_1.01,0.13810559511184692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,2,power_law_1.01,0.15843199491500853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,2,power_law_1.01,0.16031999588012696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,2,power_law_1.01,0.16686079502105713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,2,power_law_1.01,0.16883840560913085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,2,power_law_1.01,0.17276159524917603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,2,power_law_1.01,0.17205120325088502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,2,power_law_1.01,0.17418880462646485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,2,power_law_1.01,0.17730560302734374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,2,power_law_1.01,0.184607994556427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,2,power_law_1.01,0.18533120155334473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,2,power_law_1.01,0.1950144052505493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,2,power_law_1.01,0.21985280513763428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,2,power_law_1.01,0.2375744104385376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,2,power_law_1.01,0.27645440101623536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,2,power_law_1.01,0.323142409324646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,2,power_law_1.01,0.41225600242614746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,2,power_law_1.01,0.4979519844055176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,2,power_law_1.01,0.65763840675354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,2,power_law_1.01,0.8389760017395019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,2,power_law_1.01,1.1664575576782226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,2,power_law_1.01,1.5381183624267578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,2,power_law_1.01,0.04437119960784912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,2,power_law_1.01,0.046137601137161255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,2,power_law_1.01,0.04832639992237091
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,2,power_law_1.01,0.05455999970436096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,2,power_law_1.01,0.06384000182151794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,2,power_law_1.01,0.06762880086898804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,2,power_law_1.01,0.0737280011177063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,2,power_law_1.01,0.07530239820480347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,2,power_law_1.01,0.07736319899559022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,2,power_law_1.01,0.07638400197029113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,2,power_law_1.01,0.08184319734573364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,2,power_law_1.01,0.08346880078315735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,2,power_law_1.01,0.08840960264205933
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,2,power_law_1.01,0.09409279823303222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,2,power_law_1.01,0.10134400129318237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,2,power_law_1.01,0.10646400451660157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,2,power_law_1.01,0.10900479555130005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,2,power_law_1.01,0.12599040269851686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,2,power_law_1.01,0.14306559562683105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,2,power_law_1.01,0.1749119997024536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,2,power_law_1.01,0.20830080509185792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,2,power_law_1.01,0.27475199699401853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,2,power_law_1.01,0.33860480785369873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,2,power_law_1.01,0.46225919723510744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,2,power_law_1.01,0.594758415222168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,2,power_law_1.01,0.8661055564880371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,2,power_law_1.01,1.1172608375549316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,16,power_law_1.2,0.02044160068035126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,16,power_law_1.2,0.024563199281692503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,16,power_law_1.2,0.023712000250816344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,16,power_law_1.2,0.025119999051094057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,16,power_law_1.2,0.024537600576877594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,16,power_law_1.2,0.024352000653743745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,16,power_law_1.2,0.027897599339485168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,16,power_law_1.2,0.02908799946308136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,16,power_law_1.2,0.037171199917793274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,16,power_law_1.2,0.03489919900894165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,16,power_law_1.2,0.03797119855880737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,16,power_law_1.2,0.050387197732925416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,16,power_law_1.2,0.05109120011329651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,16,power_law_1.2,0.05073919892311096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,16,power_law_1.2,0.05421440005302429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,16,power_law_1.2,0.053350400924682614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,16,power_law_1.2,0.05603839755058289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,16,power_law_1.2,0.05692800283432007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,16,power_law_1.2,0.057158398628234866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,16,power_law_1.2,0.06115840077400207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,16,power_law_1.2,0.06748160123825073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,16,power_law_1.2,0.07661439776420594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,16,power_law_1.2,0.09752320051193238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,16,power_law_1.2,0.12109440565109253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,16,power_law_1.2,0.14412800073623658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,16,power_law_1.2,0.2065664052963257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,16,power_law_1.2,0.2566783905029297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,8,balanced,0.03584533433119456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,8,balanced,0.03985599925120672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,8,balanced,0.03772799919048945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,8,balanced,0.042122667034467064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,8,balanced,0.04797866443792979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,8,balanced,0.05242133140563965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,8,balanced,0.05420800050099691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,8,balanced,0.05407466491063436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,8,balanced,0.0543039987484614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,8,balanced,0.05399466554323832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,8,balanced,0.0562666654586792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,8,balanced,0.05622933308283488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,8,balanced,0.05575466652711233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,8,balanced,0.0581226646900177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,8,balanced,0.06234133243560791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,8,balanced,0.06453333298365276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,8,balanced,0.06854933500289917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,8,balanced,0.07667199770609538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,8,balanced,0.07866666714350383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,8,balanced,0.09702400366465251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,8,balanced,0.10957866907119751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,8,balanced,0.14867732922236124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,8,balanced,0.17674134174982706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,8,balanced,0.23753066857655844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,8,balanced,0.29708266258239746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,8,balanced,0.4169119993845622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,8,balanced,0.534005324045817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,2,balanced,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,2,balanced,0.01947733387351036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,2,balanced,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,2,balanced,0.023503998915354412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,2,balanced,0.03053866575161616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,2,balanced,0.04605866471926371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,2,balanced,0.04754666487375895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,2,balanced,0.048138668139775596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,2,balanced,0.04974400003751119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,2,balanced,0.05030400057633718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,2,balanced,0.05226133267084757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,2,balanced,0.05462933580080668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,2,balanced,0.055573334296544395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,2,balanced,0.05773333211739858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,2,balanced,0.06485866506894429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,2,balanced,0.06473599870999654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,2,balanced,0.06617600222428639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,2,balanced,0.09292800227801006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,2,balanced,0.09130133191744487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,2,balanced,0.12351999680201213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,2,balanced,0.1323199967543284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,2,balanced,0.16726400454839072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,2,balanced,0.17913599809010824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,2,balanced,0.1914400060971578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,2,balanced,0.20485333601633707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,2,balanced,0.3503626585006714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,2,balanced,0.37404799461364746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,2,power_law_1.01,0.04423039853572845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,2,power_law_1.01,0.047635200619697574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,2,power_law_1.01,0.05102720260620117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,2,power_law_1.01,0.05959039926528931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,2,power_law_1.01,0.06650239825248719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,2,power_law_1.01,0.07743359804153442
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,2,power_law_1.01,0.08663679957389832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,2,power_law_1.01,0.08515200018882751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,2,power_law_1.01,0.09054719805717468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,2,power_law_1.01,0.09408640265464782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,2,power_law_1.01,0.0938431978225708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,2,power_law_1.01,0.09864959716796876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,2,power_law_1.01,0.10140800476074219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,2,power_law_1.01,0.10457600355148315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,2,power_law_1.01,0.11436799764633179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,2,power_law_1.01,0.1214400053024292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,2,power_law_1.01,0.1223680019378662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,2,power_law_1.01,0.13941760063171388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,2,power_law_1.01,0.1582335948944092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,2,power_law_1.01,0.1945024013519287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,2,power_law_1.01,0.2265023946762085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,2,power_law_1.01,0.2903872013092041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,2,power_law_1.01,0.3645503997802734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,2,power_law_1.01,0.5049024105072022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,2,power_law_1.01,0.6381311893463135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,2,power_law_1.01,0.931935977935791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,2,power_law_1.01,1.209817600250244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,1,balanced,0.160863995552063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,1,balanced,0.16569599509239197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,1,balanced,0.1778986652692159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,1,balanced,0.20737600326538086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,1,balanced,0.27101866404215497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,1,balanced,0.4174933433532715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,1,balanced,0.42296000321706134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,1,balanced,0.42395734786987305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,1,balanced,0.4261600176493327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,1,balanced,0.4278293450673421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,1,balanced,0.4323893388112386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,1,balanced,0.4412746826807658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,1,balanced,0.44410133361816406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,1,balanced,0.4522346655527751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,1,balanced,0.44842131932576496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,1,balanced,0.4553813139597575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,32,power_law_1.2,0.022342400252819063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,1,balanced,0.46540268262227374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,1,balanced,0.4934240182240804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,32,power_law_1.2,0.022617599368095397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,1,balanced,0.5169066588083903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,32,power_law_1.2,0.02096640020608902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,1,balanced,0.5801973342895508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,1,balanced,0.6432160139083862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,1,balanced,0.7312373320261637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,1,balanced,0.840218702952067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,1,balanced,1.190351963043213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,32,power_law_1.2,0.01932799965143204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,1,balanced,1.416111946105957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,32,power_law_1.2,0.02014079988002777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,1,balanced,2.0213546752929688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,32,power_law_1.2,0.020684799551963805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,32,power_law_1.2,0.02162559926509857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,32,power_law_1.2,0.023001599311828613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,1,balanced,2.6661814053853354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,32,power_law_1.2,0.023039999604225158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,32,power_law_1.2,0.022912000119686127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,32,power_law_1.2,0.02908799946308136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,32,power_law_1.2,0.029702401161193846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,32,power_law_1.2,0.030329599976539612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,32,power_law_1.2,0.03953920006752014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,32,power_law_1.2,0.03964160084724426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,32,power_law_1.2,0.04032639861106872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,32,power_law_1.2,0.044409599900245664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,32,power_law_1.2,0.052223998308181765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,32,power_law_1.2,0.056934398412704465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,32,power_law_1.2,0.07023360133171082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,32,power_law_1.2,0.05745919942855835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,32,power_law_1.2,0.0694208025932312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,32,power_law_1.2,0.08479999899864196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,32,power_law_1.2,0.10941439867019653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,32,power_law_1.2,0.1407871961593628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,32,power_law_1.2,0.19561599493026732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,32,power_law_1.2,0.2621056079864502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,128,power_law_1.01,0.048102399706840514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,128,power_law_1.01,0.04864639937877655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,128,power_law_1.01,0.047443199157714847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,128,power_law_1.01,0.05084159970283508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,128,power_law_1.01,0.05026559829711914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,128,power_law_1.01,0.05103359818458557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,128,power_law_1.01,0.05149440169334411
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,128,power_law_1.01,0.051737600564956666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,128,power_law_1.01,0.052339202165603636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,128,power_law_1.01,0.051622402667999265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,128,power_law_1.01,0.05286399722099304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,128,power_law_1.01,0.054016000032424925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,128,power_law_1.01,0.053887999057769774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,128,power_law_1.01,0.05512319803237915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,128,power_law_1.01,0.05889279842376709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,128,power_law_1.01,0.05932160019874573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,128,power_law_1.01,0.06054400205612183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,128,power_law_1.01,0.06664959788322448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,128,power_law_1.01,0.07242879867553711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,128,power_law_1.01,0.08252800107002259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,128,power_law_1.01,0.09241600036621093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,128,power_law_1.01,0.11424000263214111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,128,power_law_1.01,0.13040640354156494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,128,power_law_1.01,0.17831679582595825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,128,power_law_1.01,0.21905279159545898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,128,power_law_1.01,0.30266239643096926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,128,power_law_1.01,0.38668160438537597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,128,power_law_1.2,0.04992640018463135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,128,power_law_1.2,0.05027840137481689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,128,power_law_1.2,0.05020800232887268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,128,power_law_1.2,0.05235199928283692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,128,power_law_1.2,0.05000960230827332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,128,power_law_1.2,0.05050879716873169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,128,power_law_1.2,0.05167359709739685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,128,power_law_1.2,0.05297920107841492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,128,power_law_1.2,0.05278080105781555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,128,power_law_1.2,0.05275520086288452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,128,power_law_1.2,0.05377280116081238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,128,power_law_1.2,0.05354239940643311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,128,power_law_1.2,0.054451197385787964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,128,power_law_1.2,0.054451197385787964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,128,power_law_1.2,0.05873280167579651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,128,power_law_1.2,0.0584384024143219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,128,power_law_1.2,0.06071680188179016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,128,power_law_1.2,0.06574079990386963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,128,power_law_1.2,0.07092480063438415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,128,power_law_1.2,0.08370559811592101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,128,power_law_1.2,0.09274240136146546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,128,power_law_1.2,0.12814719676971437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,128,power_law_1.2,0.13836159706115722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,128,power_law_1.2,0.1943616032600403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,128,power_law_1.2,0.2534656047821045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,128,power_law_1.2,0.3514240026473999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,128,power_law_1.2,0.4340672016143799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,4,power_law_1.01,0.04340479969978332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,4,power_law_1.01,0.04544639885425568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,4,power_law_1.01,0.04373759925365448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,4,power_law_1.01,0.04874880015850067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,4,power_law_1.01,0.04903680086135864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,4,power_law_1.01,0.05002239942550659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,4,power_law_1.01,0.05432320237159729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,4,power_law_1.01,0.05498239994049072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,4,power_law_1.01,0.05432320237159729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,4,power_law_1.01,0.05559679865837097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,4,power_law_1.01,0.05589759945869446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,4,power_law_1.01,0.058790397644042966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,4,power_law_1.01,0.058963197469711306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,4,power_law_1.01,0.0604095995426178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,4,power_law_1.01,0.06724479794502258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,4,power_law_1.01,0.0694208025932312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,4,power_law_1.01,0.07283200025558471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,1,balanced,0.08674666285514832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,4,power_law_1.01,0.08673920035362244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,1,balanced,0.09690666198730469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,1,balanced,0.1135093371073405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,1,balanced,0.15416533748308817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,4,power_law_1.01,0.1053056001663208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,1,balanced,0.2156053384145101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,1,balanced,0.3276053269704183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,4,power_law_1.01,0.1268928050994873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,4,power_law_1.01,0.14699519872665406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,4,power_law_1.01,0.18200960159301757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,4,power_law_1.01,0.22028160095214844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,4,power_law_1.01,0.2996608018875122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,4,power_law_1.01,0.3788543939590454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,1,balanced,0.3171573281288147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,1,balanced,0.3145280083020528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,1,balanced,0.31092266241709393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,4,power_law_1.01,0.5265088081359863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,1,balanced,0.3070720036824544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,1,balanced,0.30720533927281696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,1,balanced,0.3053599993387858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,4,power_law_1.01,0.6959936141967773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,1,balanced,0.31115732590357464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,1,balanced,0.32197866837183636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,1,balanced,0.3203893303871155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,1,balanced,0.3285226623217265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,1,balanced,0.3473759889602661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,1,balanced,0.37029866377512616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,1,balanced,0.39931201934814453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,1,balanced,0.45365333557128906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,1,balanced,0.5157333215077718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,1,balanced,0.6275573174158732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,1,balanced,0.7458879947662354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,1,balanced,1.1432267030080159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,1,balanced,1.3807360331217449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,1,balanced,2.011850674947103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,1,balanced,2.6100692749023438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,2,power_law_1.01,0.024684800207614897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,2,power_law_1.01,0.036025598645210266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,2,power_law_1.01,0.05014399886131286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,2,power_law_1.01,0.06481279730796814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,2,power_law_1.01,0.08435840010643006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,2,power_law_1.01,0.10535680055618286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,2,power_law_1.01,0.13054720163345337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,2,power_law_1.01,0.1348736047744751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,2,power_law_1.01,0.1398911952972412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,2,power_law_1.01,0.13682559728622437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,2,power_law_1.01,0.14149760007858275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,2,power_law_1.01,0.14410879611968994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,2,power_law_1.01,0.14878079891204835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,2,power_law_1.01,0.149619197845459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,2,power_law_1.01,0.16053760051727295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,2,power_law_1.01,0.1652799963951111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,2,power_law_1.01,0.18067840337753296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,2,power_law_1.01,0.20874240398406982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,2,power_law_1.01,0.19573760032653809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,2,power_law_1.01,0.2508863925933838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,2,power_law_1.01,0.2364032030105591
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,2,power_law_1.01,0.2941567897796631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,2,power_law_1.01,0.3394432067871094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,2,power_law_1.01,0.3895872116088867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,2,power_law_1.01,0.46124801635742185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,2,power_law_1.01,0.6276095867156982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,2,power_law_1.01,0.7931583881378174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,8,power_law_1.2,0.05281280279159546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,8,power_law_1.2,0.05953279733657837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,8,power_law_1.2,0.0637440025806427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,8,power_law_1.2,0.07294719815254211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,8,power_law_1.2,0.07633919715881347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,8,power_law_1.2,0.07537919878959656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,8,power_law_1.2,0.07881600260734559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,8,power_law_1.2,0.08227199912071229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,8,power_law_1.2,0.0803264021873474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,8,power_law_1.2,0.07983360290527344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,8,power_law_1.2,0.07987200021743775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,8,power_law_1.2,0.08282880187034607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,8,power_law_1.2,0.08300799727439881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,8,power_law_1.2,0.08661119937896729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,8,power_law_1.2,0.08929280042648316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,8,power_law_1.2,0.09062399864196777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,8,power_law_1.2,0.09511680006980897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,8,power_law_1.2,0.1077888011932373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,8,power_law_1.2,0.11726080179214478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,8,power_law_1.2,0.14505599737167357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,8,power_law_1.2,0.16088320016860963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,8,power_law_1.2,0.1982143998146057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,8,power_law_1.2,0.24542720317840577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,8,power_law_1.2,0.35114240646362305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,8,power_law_1.2,0.4608128070831299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,8,power_law_1.2,0.6756608009338378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,8,power_law_1.2,0.9778816223144531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,4,power_law_1.01,0.04296320080757141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,4,power_law_1.01,0.046009600162506104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,4,power_law_1.01,0.04778240025043488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,4,power_law_1.01,0.05504639744758606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,4,power_law_1.01,0.05622400045394897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,4,power_law_1.01,0.058841598033905027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,4,power_law_1.01,0.0654911994934082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,4,power_law_1.01,0.06373760104179382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,4,power_law_1.01,0.06435199975967407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,4,power_law_1.01,0.06421120166778564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,4,power_law_1.01,0.07015039920806884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,4,power_law_1.01,0.07249280214309692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,4,power_law_1.01,0.07475200295448303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,4,power_law_1.01,0.07543039917945862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,4,power_law_1.01,0.08344320058822632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,4,power_law_1.01,0.08449919819831848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,4,power_law_1.01,0.09247360229492188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,4,power_law_1.01,0.1057919979095459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,4,power_law_1.01,0.11904000043869019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,4,power_law_1.01,0.1447551965713501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,4,power_law_1.01,0.16636799573898314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,4,power_law_1.01,0.2076927900314331
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,4,power_law_1.01,0.26162559986114503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,4,power_law_1.01,0.3617664098739624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,4,power_law_1.01,0.4583104133605957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,4,power_law_1.01,0.6547200202941894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,4,power_law_1.01,0.8685888290405274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,2,power_law_1.2,0.0573248028755188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,2,power_law_1.2,0.07735679745674133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,2,power_law_1.2,0.08755840063095092
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,2,power_law_1.2,0.12264959812164307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,2,power_law_1.2,0.1443711996078491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,2,power_law_1.2,0.1667199969291687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,2,power_law_1.2,0.23771519660949708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,2,power_law_1.2,0.23955199718475342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,2,power_law_1.2,0.24227840900421144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,2,power_law_1.2,0.24762239456176757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,2,power_law_1.2,0.25264639854431153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,2,power_law_1.2,0.25807359218597414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,2,power_law_1.2,0.2683135986328125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,2,power_law_1.2,0.2888704061508179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,2,power_law_1.2,0.2896384000778198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,2,power_law_1.2,0.2885119915008545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,2,power_law_1.2,0.3127487897872925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,2,power_law_1.2,0.3448512077331543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,2,power_law_1.2,0.3483263969421387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,2,power_law_1.2,0.39604480266571046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,2,power_law_1.2,0.42256641387939453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,2,power_law_1.2,0.5147136211395263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,2,power_law_1.2,0.5678271770477294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,2,power_law_1.2,0.7201856136322021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,2,power_law_1.2,0.8861696243286132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,2,power_law_1.2,1.178656005859375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,2,power_law_1.2,1.438316822052002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,64,power_law_1.01,0.04293760061264038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,64,power_law_1.01,0.041631999611854556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,64,power_law_1.01,0.04167680144309997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,64,power_law_1.01,0.04460160136222839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,64,power_law_1.01,0.046009600162506104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,64,power_law_1.01,0.046291199326515195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,64,power_law_1.01,0.047116801142692566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,64,power_law_1.01,0.04826880097389221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,64,power_law_1.01,0.05103999972343445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,64,power_law_1.01,0.05058559775352478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,64,power_law_1.01,0.05101439952850342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,64,power_law_1.01,0.051667201519012454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,64,power_law_1.01,0.051999998092651364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,64,power_law_1.01,0.055980801582336426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,64,power_law_1.01,0.061343997716903687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,64,power_law_1.01,0.05817599892616272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,64,power_law_1.01,0.06263039708137512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,64,power_law_1.01,0.06945279836654664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,64,power_law_1.01,0.0764415979385376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,64,power_law_1.01,0.09144319891929627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,64,power_law_1.01,0.10712319612503052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,64,power_law_1.01,0.1388416051864624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,64,power_law_1.01,0.16666879653930664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,64,power_law_1.01,0.21846399307250977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,64,power_law_1.01,0.28645761013031007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,64,power_law_1.01,0.4009984016418457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,64,power_law_1.01,0.5281343936920166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,4,power_law_1.2,0.06220800280570984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,4,power_law_1.2,0.07992960214614868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,4,power_law_1.2,0.08305919766426087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,4,power_law_1.2,0.09416959881782531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,4,power_law_1.2,0.11093120574951172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,4,power_law_1.2,0.11931519508361817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,4,power_law_1.2,0.16172800064086915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,4,power_law_1.2,0.15492479801177977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,4,power_law_1.2,0.15852160453796388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,4,power_law_1.2,0.15190399885177613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,4,power_law_1.2,0.16533119678497316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,4,power_law_1.2,0.15639040470123292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,4,power_law_1.2,0.17363200187683106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,4,power_law_1.2,0.17095040082931517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,4,power_law_1.2,0.18316160440444945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,4,power_law_1.2,0.18772480487823487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,4,power_law_1.2,0.20223360061645507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,4,power_law_1.2,0.22250878810882568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,4,power_law_1.2,0.23663361072540284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,4,power_law_1.2,0.27714560031890867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,4,power_law_1.2,0.3051136016845703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,4,power_law_1.2,0.38462080955505373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,4,power_law_1.2,0.4343616008758545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,4,power_law_1.2,0.5823935985565185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,4,power_law_1.2,0.6928127765655517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,4,power_law_1.2,0.9691583633422851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,4,power_law_1.2,1.3191871643066406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,8,power_law_1.2,0.041555199027061465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,8,power_law_1.2,0.04389120042324066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,8,power_law_1.2,0.043289598822593686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,8,power_law_1.2,0.04702720046043396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,8,power_law_1.2,0.04966399967670441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,8,power_law_1.2,0.0497408002614975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,8,power_law_1.2,0.05102720260620117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,8,power_law_1.2,0.051283198595047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,8,power_law_1.2,0.05074560046195984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,8,power_law_1.2,0.05172479748725891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,8,power_law_1.2,0.05343359708786011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,8,power_law_1.2,0.05436800122261047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,8,power_law_1.2,0.056857597827911374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,8,power_law_1.2,0.05883520245552063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,8,power_law_1.2,0.0644927978515625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,8,power_law_1.2,0.06755200028419495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,8,power_law_1.2,0.06944000124931335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,8,power_law_1.2,0.07866880297660828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,8,power_law_1.2,0.08676480054855347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,8,power_law_1.2,0.10394879579544067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,8,power_law_1.2,0.1179967999458313
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,8,power_law_1.2,0.1659327983856201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,8,power_law_1.2,0.2065216064453125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,8,power_law_1.2,0.2844608068466187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,8,power_law_1.2,0.37267839908599854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,8,power_law_1.2,0.5214399814605712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,8,power_law_1.2,0.6774591922760009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,1,balanced,0.04930666585763296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,1,balanced,0.05190399785836538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,1,balanced,0.053786665201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,1,balanced,0.06862399975458781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,1,balanced,0.09966933727264404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,1,balanced,0.1400159994761149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,1,balanced,0.13958932956059775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,1,balanced,0.14124799768129984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,1,balanced,0.1432213286558787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,1,balanced,0.14333867033322653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,1,balanced,0.14575999975204468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,1,balanced,0.1514026621977488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,1,balanced,0.1535040040810903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,1,balanced,0.15525333086649576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,1,balanced,0.16074666380882263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,1,balanced,0.16334399580955505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,1,balanced,0.17710399627685547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,1,balanced,0.1975839932759603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,1,balanced,0.21796266237894693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,1,balanced,0.25366934140523273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,1,balanced,0.2855520049730937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,1,balanced,0.40556800365448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,1,balanced,0.46247466405232746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,1,balanced,0.6659040053685507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,1,balanced,0.850810686747233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,1,balanced,1.23909330368042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,1,balanced,1.603493372599284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,4,balanced,0.04576533536116282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,4,balanced,0.045653333266576133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,4,balanced,0.05614933371543884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,4,balanced,0.07711466650168101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,4,balanced,0.10973866780598958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,4,balanced,0.17221866051355997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,4,balanced,0.17536000410715738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,4,balanced,0.17345066865285239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,4,balanced,0.17485866943995157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,4,balanced,0.17612799008687338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,4,balanced,0.1758506695429484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,4,balanced,0.17965332667032877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,4,balanced,0.1808799902598063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,4,balanced,0.18300267060597739
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,4,balanced,0.19033066431681314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,4,balanced,0.19321600596110025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,4,balanced,0.1975839932759603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,4,balanced,0.2118026614189148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,4,balanced,0.2206933299700419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,4,balanced,0.24272000789642334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,4,balanced,0.2715573310852051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,4,balanced,0.31276800235112506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,4,balanced,0.3563520113627116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,4,balanced,0.4715199867884318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,4,balanced,0.5437279939651489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,4,balanced,0.7704693476359049
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,4,balanced,0.9312853018442789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,32,power_law_1.01,0.048102399706840514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,32,power_law_1.01,0.046623998880386354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,32,power_law_1.01,0.04565759897232056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,32,power_law_1.01,0.04843519926071167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,32,power_law_1.01,0.04966399967670441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,32,power_law_1.01,0.050732797384262084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,32,power_law_1.01,0.05115519762039185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,32,power_law_1.01,0.05187839865684509
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,32,power_law_1.01,0.05249279737472534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,32,power_law_1.01,0.05194240212440491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,32,power_law_1.01,0.053260797262191774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,32,power_law_1.01,0.0579584002494812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,32,power_law_1.01,0.05751039981842041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,32,power_law_1.01,0.058374398946762086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,32,power_law_1.01,0.0617792010307312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,32,power_law_1.01,0.0621504008769989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,32,power_law_1.01,0.06754559874534607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,32,power_law_1.01,0.07918720245361328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,32,power_law_1.01,0.0836736023426056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,32,power_law_1.01,0.1029312014579773
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,32,power_law_1.01,0.11976959705352783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,32,power_law_1.01,0.14653439521789552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,32,power_law_1.01,0.18743040561676025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,32,power_law_1.01,0.23978240489959718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,32,power_law_1.01,0.30806400775909426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,32,power_law_1.01,0.43091840744018556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,32,power_law_1.01,0.586732816696167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,32,power_law_1.01,0.024806399643421174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,32,power_law_1.01,0.024102400243282317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,32,power_law_1.01,0.023155200481414794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,32,power_law_1.01,0.02385919988155365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,32,power_law_1.01,0.024876800179481507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,32,power_law_1.01,0.02927359938621521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,32,power_law_1.01,0.03736959993839264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,32,power_law_1.01,0.037574398517608645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,32,power_law_1.01,0.051097601652145386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,32,power_law_1.01,0.052198398113250735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,32,power_law_1.01,0.05258240103721619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,32,power_law_1.01,0.056953597068786624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,32,power_law_1.01,0.053427201509475705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,32,power_law_1.01,0.05588480234146118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,32,power_law_1.01,0.055155199766159055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,32,power_law_1.01,0.05240960121154785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,32,power_law_1.01,0.054771202802658084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,32,power_law_1.01,0.05559679865837097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,32,power_law_1.01,0.0531391978263855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,32,power_law_1.01,0.06414719820022582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,32,power_law_1.01,0.0716159999370575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,32,power_law_1.01,0.08553599715232849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,32,power_law_1.01,0.10392320156097412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,32,power_law_1.01,0.1298367977142334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,32,power_law_1.01,0.1695296049118042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,32,power_law_1.01,0.25338239669799806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,32,power_law_1.01,0.29757440090179443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,2,power_law_1.2,0.019020800292491914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,32,power_law_1.01,0.060185599327087405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,2,power_law_1.2,0.02056960016489029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,32,power_law_1.01,0.05971840023994446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,2,power_law_1.2,0.023923200368881226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,2,power_law_1.2,0.02974080145359039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,32,power_law_1.01,0.05854079723358154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,2,power_law_1.2,0.03668479919433594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,32,power_law_1.01,0.06004480123519897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,2,power_law_1.2,0.042342400550842284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,32,power_law_1.01,0.06116480231285095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,2,power_law_1.2,0.05070719718933105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,32,power_law_1.01,0.0573248028755188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,2,power_law_1.2,0.052288001775741576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,32,power_law_1.01,0.06074240207672119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,2,power_law_1.2,0.05307520031929016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,32,power_law_1.01,0.05889279842376709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,2,power_law_1.2,0.0546239972114563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,32,power_law_1.01,0.05987840294837952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,2,power_law_1.2,0.05624319911003113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,32,power_law_1.01,0.06149759888648987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,2,power_law_1.2,0.05767679810523987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,2,power_law_1.2,0.05941759943962097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,2,power_law_1.2,0.06054400205612183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,2,power_law_1.2,0.06232960224151611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,2,power_law_1.2,0.06413440108299255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,2,power_law_1.2,0.0796288013458252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,32,power_law_1.01,0.061478400230407716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,2,power_law_1.2,0.08554880023002624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,32,power_law_1.01,0.06311039924621582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,2,power_law_1.2,0.09147520065307617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,32,power_law_1.01,0.06236799955368042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,2,power_law_1.2,0.11329280138015747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,32,power_law_1.01,0.06465920209884643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,2,power_law_1.2,0.10769280195236205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,32,power_law_1.01,0.06974080204963684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,2,power_law_1.2,0.1342463970184326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,32,power_law_1.01,0.07095040082931518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,2,power_law_1.2,0.16715519428253173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,32,power_law_1.01,0.07393280267715455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,2,power_law_1.2,0.1973312020301819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,32,power_law_1.01,0.07985919713973999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,2,power_law_1.2,0.24599039554595947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,32,power_law_1.01,0.08472959995269776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,2,power_law_1.2,0.3127295970916748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,32,power_law_1.01,0.09752960205078125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,32,power_law_1.01,0.11166080236434936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,2,power_law_1.2,0.3842751979827881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,32,power_law_1.01,0.141702401638031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,32,power_law_1.01,0.1637887954711914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,32,power_law_1.01,0.20916481018066407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,32,power_law_1.01,0.26282238960266113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,32,power_law_1.01,0.36827518939971926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,32,power_law_1.01,0.4889088153839111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,64,power_law_1.2,0.04332799911499023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,64,power_law_1.2,0.039059200882911684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,64,power_law_1.2,0.039238399267196654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,64,power_law_1.2,0.04356479942798615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,64,power_law_1.2,0.040345600247383116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,64,power_law_1.2,0.04132480025291443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,64,power_law_1.2,0.042828801274299624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,64,power_law_1.2,0.04325760006904602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,64,power_law_1.2,0.04441600143909454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,64,power_law_1.2,0.04397439956665039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,64,power_law_1.2,0.044819200038909913
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,64,power_law_1.2,0.045414400100708005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,64,power_law_1.2,0.046854400634765626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,64,power_law_1.2,0.0477183997631073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,64,power_law_1.2,0.05192959904670715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,64,power_law_1.2,0.05200639963150024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,64,power_law_1.2,0.05563520193099976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,64,power_law_1.2,0.061887997388839724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,64,power_law_1.2,0.06741120219230652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,64,power_law_1.2,0.08236160278320312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,64,power_law_1.2,0.08910080194473266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,64,power_law_1.2,0.1125823974609375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,64,power_law_1.2,0.12692480087280272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,64,power_law_1.2,0.1709439992904663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,64,power_law_1.2,0.22960638999938965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,64,power_law_1.2,0.3342528104782104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,64,power_law_1.2,0.40113282203674316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,2,power_law_1.01,0.04499199986457825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,2,power_law_1.01,0.05124480128288269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,2,power_law_1.01,0.05384320020675659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,2,power_law_1.01,0.06955519914627076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,2,power_law_1.01,0.08073599934577942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,2,power_law_1.01,0.09234560132026673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,2,power_law_1.01,0.10676480531692505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,2,power_law_1.01,0.11017600297927857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,2,power_law_1.01,0.11074559688568116
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,2,power_law_1.01,0.11484800577163697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,2,power_law_1.01,0.11541759967803955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,2,power_law_1.01,0.12441600561141967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,2,power_law_1.01,0.12557439804077147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,2,power_law_1.01,0.12859519720077514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,2,power_law_1.01,0.1382848024368286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,2,power_law_1.01,0.1400704026222229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,2,power_law_1.01,0.15285120010375977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,2,power_law_1.01,0.16987520456314087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,2,power_law_1.01,0.19018239974975587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,2,power_law_1.01,0.224288010597229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,2,power_law_1.01,0.2643392086029053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,2,power_law_1.01,0.3414975881576538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,2,power_law_1.01,0.4092095851898193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,2,power_law_1.01,0.5568319797515869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,2,power_law_1.01,0.6833280086517334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,2,power_law_1.01,1.0084416389465332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,2,power_law_1.01,1.3249664306640625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,256,power_law_1.01,0.017555199563503265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,256,power_law_1.01,0.018137599527835845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,256,power_law_1.01,0.01910399943590164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,256,power_law_1.01,0.021036800742149354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,256,power_law_1.01,0.03436799943447113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,256,power_law_1.01,0.02988159954547882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,256,power_law_1.01,0.0297791987657547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,256,power_law_1.01,0.030035200715065002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,256,power_law_1.01,0.02991360127925873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,256,power_law_1.01,0.029811200499534608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,256,power_law_1.01,0.030291199684143066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,256,power_law_1.01,0.03047040104866028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,256,power_law_1.01,0.02999039888381958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,256,power_law_1.01,0.029580798745155335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,256,power_law_1.01,0.03079040050506592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,256,power_law_1.01,0.03269760012626648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,256,power_law_1.01,0.033036801218986514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,256,power_law_1.01,0.0353983998298645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,256,power_law_1.01,0.03656319975852966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,256,power_law_1.01,0.03938559889793396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,256,power_law_1.01,0.04536960124969482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,256,power_law_1.01,0.05792639851570129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,256,power_law_1.01,0.07351679801940918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,256,power_law_1.01,0.09618560075759888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,256,power_law_1.01,0.12021759748458863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,256,power_law_1.01,0.17137279510498046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,256,power_law_1.01,0.21722240447998048
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,2,power_law_1.2,0.07637760043144226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,2,power_law_1.2,0.08970239758491516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,2,power_law_1.2,0.10027519464492798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,2,power_law_1.2,0.1286463975906372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,2,power_law_1.2,0.14263039827346802
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,2,power_law_1.2,0.1641088008880615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,2,power_law_1.2,0.19912960529327392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,2,power_law_1.2,0.20134398937225342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,2,power_law_1.2,0.21685760021209716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,2,power_law_1.2,0.2160383939743042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,2,power_law_1.2,0.22012801170349122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,2,power_law_1.2,0.21809279918670654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,2,power_law_1.2,0.22883200645446777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,2,power_law_1.2,0.237824010848999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,2,power_law_1.2,0.23934080600738525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,2,power_law_1.2,0.24019839763641357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,2,power_law_1.2,0.24983680248260498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,2,power_law_1.2,0.27885439395904543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,2,power_law_1.2,0.301363205909729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,2,power_law_1.2,0.34063360691070554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,2,power_law_1.2,0.3889791965484619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,2,power_law_1.2,0.4670015811920166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,2,power_law_1.2,0.5600128173828125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,2,power_law_1.2,0.7851327896118164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,2,power_law_1.2,0.9279487609863282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,2,power_law_1.2,1.2731648445129395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,2,power_law_1.2,1.7256576538085937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,16,power_law_1.01,0.020505599677562714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,16,power_law_1.01,0.024447999894618988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,16,power_law_1.01,0.024025599658489227
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,16,power_law_1.01,0.024249599874019624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,16,power_law_1.01,0.024742400646209715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,16,power_law_1.01,0.026041600108146667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,16,power_law_1.01,0.02901119887828827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,16,power_law_1.01,0.030054399371147157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,16,power_law_1.01,0.03400959968566895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,16,power_law_1.01,0.03550080060958862
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,16,power_law_1.01,0.03824639916419983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,16,power_law_1.01,0.0500544011592865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,16,power_law_1.01,0.05234559774398804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,16,power_law_1.01,0.051718401908874514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,16,power_law_1.01,0.05665919780731201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,16,power_law_1.01,0.053574401140213015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,16,power_law_1.01,0.055174398422241214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,16,power_law_1.01,0.05883520245552063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,16,power_law_1.01,0.05583999752998352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,16,balanced,0.0394400010506312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,16,balanced,0.03987200061480204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,16,balanced,0.03987200061480204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,16,power_law_1.01,0.061740797758102414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,16,balanced,0.04348266621430715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,16,balanced,0.04364799956480662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,16,balanced,0.045567999283472695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,16,power_law_1.01,0.06846719980239868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,16,power_law_1.01,0.07960320115089417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,16,power_law_1.01,0.09054080247879029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,16,power_law_1.01,0.11971839666366577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,16,power_law_1.01,0.13934719562530518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,16,power_law_1.01,0.1956287980079651
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,16,balanced,0.04785066843032837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,16,power_law_1.01,0.2598975896835327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,16,balanced,0.04860266546408335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,16,balanced,0.04790933430194855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,16,balanced,0.04967466493447622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,16,balanced,0.050026665131251015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,16,balanced,0.04996799925963084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,16,balanced,0.05194666484991709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,16,balanced,0.05229333539803823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,16,balanced,0.055999999245007835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,16,balanced,0.05842133363087972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,16,balanced,0.060218666990598045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,16,balanced,0.06645333270231883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,16,balanced,0.07046400010585785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,16,balanced,0.07995733122030894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,16,balanced,0.09062400460243225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,16,balanced,0.11136000355084737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,16,balanced,0.12991467118263245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,16,balanced,0.19292799631754556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,16,balanced,0.24078933397928873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,16,balanced,0.3386773268381755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,16,balanced,0.4330293337504069
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,1,power_law_1.2,0.05325440168380737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,1,power_law_1.2,0.05380480289459229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,1,power_law_1.2,0.055155199766159055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,1,power_law_1.2,0.06420480012893677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,1,power_law_1.2,0.07279360294342041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,1,power_law_1.2,0.08387200236320495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,1,power_law_1.2,0.1039423942565918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,1,power_law_1.2,0.10444799661636353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,1,power_law_1.2,0.10846079587936401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,1,power_law_1.2,0.11196800470352172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,1,power_law_1.2,0.11512320041656494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,1,power_law_1.2,0.11900160312652588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,1,power_law_1.2,0.12190079689025879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,1,power_law_1.2,0.12784639596939087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,1,power_law_1.2,0.13420159816741944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,1,power_law_1.2,0.13728640079498292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,1,power_law_1.2,0.15519360303878785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,1,power_law_1.2,0.1797760009765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,1,power_law_1.2,0.202239990234375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,1,power_law_1.2,0.24177279472351074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,1,power_law_1.2,0.2864959955215454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,1,power_law_1.2,0.3605504035949707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,1,power_law_1.2,0.4404416084289551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,1,power_law_1.2,0.5982975959777832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,1,power_law_1.2,0.7479487895965576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,1,power_law_1.2,1.0700927734375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,1,power_law_1.2,1.373145580291748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,2,power_law_1.2,0.05861120223999024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,2,power_law_1.2,0.06190720200538635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,2,power_law_1.2,0.06216959953308106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,2,power_law_1.2,0.07271040081977845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,2,power_law_1.2,0.08284800052642823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,2,power_law_1.2,0.0887935996055603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,2,power_law_1.2,0.10424319505691529
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,2,power_law_1.2,0.1099392056465149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,2,power_law_1.2,0.11080319881439209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,2,power_law_1.2,0.10955519676208496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,2,power_law_1.2,0.11448320150375366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,2,power_law_1.2,0.11634559631347656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,2,power_law_1.2,0.11847679615020752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,2,power_law_1.2,0.1230847954750061
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,2,power_law_1.2,0.13199360370635987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,2,power_law_1.2,0.13523199558258056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,2,power_law_1.2,0.1443519949913025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,2,power_law_1.2,0.16095999479293824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,2,power_law_1.2,0.18083200454711915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,2,power_law_1.2,0.22140159606933593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,4,balanced,0.03766933331886927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,2,power_law_1.2,0.2545408010482788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,4,balanced,0.040005333721637726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,4,balanced,0.04182399809360504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,4,balanced,0.04603200157483419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,2,power_law_1.2,0.3377216100692749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,2,power_law_1.2,0.40845441818237305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,2,power_law_1.2,0.5687295913696289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,2,power_law_1.2,0.7531263828277588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,4,balanced,0.05986666679382324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,2,power_law_1.2,1.016876792907715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,4,balanced,0.08502933382987976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,4,balanced,0.08599467078844707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,4,balanced,0.08252800007661183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,4,balanced,0.08322133123874664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,4,balanced,0.08257066706816356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,4,balanced,0.084906667470932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,4,balanced,0.08758933345476787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,2,power_law_1.2,1.323737621307373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,4,balanced,0.08743466933568318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,4,balanced,0.0869653324286143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,4,balanced,0.09274666508038838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,4,balanced,0.09508267045021057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,4,balanced,0.10057600339253743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,4,balanced,0.11062399546305339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,4,balanced,0.1155413289864858
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,4,balanced,0.13705066839853922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,4,balanced,0.14455999930699667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,4,balanced,0.18214933077494302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,4,balanced,0.2021333376566569
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,4,balanced,0.27855465809504193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,4,balanced,0.33910401662190753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,4,balanced,0.4848266839981079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,4,balanced,0.6150026718775431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,16,4,power_law_1.01,0.04581120014190674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,16,4,power_law_1.01,0.048089599609375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,16,4,power_law_1.01,0.04689919948577881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,16,4,power_law_1.01,0.05377920269966126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,16,4,power_law_1.01,0.05804799795150757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,16,4,power_law_1.01,0.05783680081367493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,16,4,power_law_1.01,0.06412799954414368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,16,4,power_law_1.01,0.06496639847755432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,16,4,power_law_1.01,0.06474879980087281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,16,4,power_law_1.01,0.06774399876594543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,16,4,power_law_1.01,0.06959999799728393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,16,4,power_law_1.01,0.06839039921760559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,16,4,power_law_1.01,0.07251840233802795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,16,4,power_law_1.01,0.07480319738388061
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,16,4,power_law_1.01,0.08099200129508972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,16,4,power_law_1.01,0.08246399760246277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,16,4,power_law_1.01,0.08794879913330078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,16,4,power_law_1.01,0.09818239808082581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,16,4,power_law_1.01,0.10677119493484497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,16,4,power_law_1.01,0.12825599908828736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,16,4,power_law_1.01,0.14480639696121217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,16,4,power_law_1.01,0.18403199911117554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,16,4,power_law_1.01,0.21857280731201173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,16,4,power_law_1.01,0.2976576089859009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,16,4,power_law_1.01,0.39450879096984864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,16,4,power_law_1.01,0.5647744178771973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,16,4,power_law_1.01,0.6514111995697022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,128,power_law_1.2,0.040966400504112245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,128,power_law_1.2,0.04126079976558685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,128,power_law_1.2,0.04131839871406555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,128,power_law_1.2,0.044761601090431216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,128,power_law_1.2,0.04583680033683777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,128,power_law_1.2,0.04588800072669983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,128,power_law_1.2,0.04752640128135681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,128,power_law_1.2,0.04892799854278564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,128,power_law_1.2,0.04935680031776428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,128,power_law_1.2,0.05047680139541626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,128,power_law_1.2,0.05431039929389954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,128,power_law_1.2,0.05601279735565186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,128,power_law_1.2,0.051583999395370485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,128,power_law_1.2,0.05562880039215088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,128,power_law_1.2,0.06188160181045532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,64,balanced,0.042026668787002563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,64,balanced,0.039674667020638786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,128,power_law_1.2,0.06314240097999572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,64,balanced,0.03782399992148081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,64,balanced,0.039706667264302574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,64,balanced,0.043866669138272606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,128,power_law_1.2,0.06695680022239685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,64,balanced,0.04403733213742574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,64,balanced,0.0458186666170756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,64,balanced,0.04599999884764353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,64,balanced,0.04602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,64,balanced,0.045754666129748024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,64,balanced,0.04826133449872335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,64,balanced,0.047925333182017006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,64,balanced,0.04791999856630961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,64,balanced,0.05012266834576925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,64,balanced,0.05442666510740916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,64,balanced,0.054192001620928444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,64,balanced,0.05835199852784475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,64,balanced,0.06221333146095276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,64,balanced,0.06451733410358429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,64,balanced,0.0746506651242574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,64,balanced,0.08319999774297078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,64,balanced,0.09919466574986775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,64,balanced,0.12388267119725545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,64,balanced,0.17300800482432047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,64,balanced,0.21313067277272543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,64,balanced,0.296234667301178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,64,balanced,0.3699146509170532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,128,power_law_1.2,0.07643520236015319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,128,power_law_1.2,0.08587520122528076
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,128,power_law_1.2,0.11011199951171875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,128,power_law_1.2,0.12802560329437257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,128,power_law_1.2,0.18112640380859374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,128,power_law_1.2,0.23420159816741942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,128,power_law_1.2,0.3348479986190796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,128,power_law_1.2,0.4387199878692627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,128,power_law_1.2,0.732857608795166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,128,power_law_1.2,1.1038784027099608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,8,power_law_1.2,0.04748800098896026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,8,power_law_1.2,0.05200639963150024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,8,power_law_1.2,0.05249279737472534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,8,power_law_1.2,0.0521664023399353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,8,power_law_1.2,0.06097919940948486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,8,power_law_1.2,0.06000000238418579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,8,power_law_1.2,0.059008002281188965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,8,power_law_1.2,0.06214399933815003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,8,power_law_1.2,0.061766397953033444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,8,power_law_1.2,0.06120319962501526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,8,power_law_1.2,0.06575359702110291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,8,power_law_1.2,0.06760960221290588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,8,power_law_1.2,0.06821759939193725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,8,power_law_1.2,0.08120319843292237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,8,power_law_1.2,0.2804095983505249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,8,power_law_1.2,0.283027195930481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,8,power_law_1.2,0.08140159845352173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,8,power_law_1.2,0.09423360228538513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,8,power_law_1.2,0.0960640013217926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,8,power_law_1.2,0.1089792013168335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,8,power_law_1.2,0.12275840044021606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,8,power_law_1.2,0.15494400262832642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,8,power_law_1.2,0.19583359956741334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,8,power_law_1.2,0.244051194190979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,8,power_law_1.2,0.304966402053833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,8,power_law_1.2,0.41290879249572754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,8,power_law_1.2,0.6119999885559082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,64,power_law_1.2,0.024851199984550477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,64,power_law_1.2,0.022495999932289124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,64,power_law_1.2,0.021875199675559998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,64,power_law_1.2,0.021939200162887574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,64,power_law_1.2,0.023974399268627166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,64,power_law_1.2,0.028441599011421202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,64,power_law_1.2,0.039103999733924866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,64,power_law_1.2,0.039103999733924866
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,64,power_law_1.2,0.043084800243377686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,64,power_law_1.2,0.04304639995098114
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,64,power_law_1.2,0.04247680008411407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,64,power_law_1.2,0.042630401253700254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,64,power_law_1.2,0.041503998637199405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,64,power_law_1.2,0.03857919871807099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,64,power_law_1.2,0.03717760145664215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,64,power_law_1.2,0.03797760009765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,64,power_law_1.2,0.03919360041618347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,64,power_law_1.2,0.0411327987909317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,64,power_law_1.2,0.04684799909591675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,64,power_law_1.2,0.05607680082321167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,64,power_law_1.2,0.05894399881362915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,64,power_law_1.2,0.07957119941711426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,64,power_law_1.2,0.10144640207290649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,64,power_law_1.2,0.12705279588699342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,64,power_law_1.2,0.17144320011138917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,64,power_law_1.2,0.24267520904541015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,64,power_law_1.2,0.3475519895553589
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,4,balanced,0.05046399931112925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,4,balanced,0.05624533196290334
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,4,balanced,0.06412800153096516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,4,balanced,0.07698133091131847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,4,balanced,0.11006399989128113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,4,balanced,0.16218133767445883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,4,balanced,0.15242133537928262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,4,balanced,0.14963199694951376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,4,balanced,0.14588800072669983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,4,balanced,0.14492799838383993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,4,balanced,0.1453386644522349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,4,balanced,0.14477866888046265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,4,balanced,0.14495999614397684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,4,balanced,0.154639999071757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,4,balanced,0.14918933312098184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,4,balanced,0.14860799908638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,4,balanced,0.16262933611869812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,4,balanced,0.1662613352139791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,4,balanced,0.1753973364830017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,4,balanced,0.18731200695037842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,4,balanced,0.20549867550532022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,4,balanced,0.240447998046875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,4,balanced,0.27511467536290485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,4,balanced,0.4121973514556885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,4,balanced,0.4822453260421753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,4,balanced,0.6896639664967855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,4,balanced,0.8835573196411133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,1,balanced,0.08747200171152751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,1,balanced,0.10088533163070679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,1,balanced,0.1272533337275187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,1,balanced,0.18043200174967447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,1,balanced,0.26868265867233276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,1,balanced,0.43348264694213867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,1,balanced,0.4236053228378296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,1,balanced,0.40694932142893475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,1,balanced,0.4096533457438151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,1,balanced,0.4007093509038289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,1,balanced,0.39955198764801025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,1,balanced,0.4012746810913086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,1,balanced,0.40298132101694745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,1,balanced,0.41265066464742023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,1,balanced,0.4129813512166341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,1,balanced,0.41529067357381183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,1,balanced,0.4375679890314738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,1,balanced,0.4570506811141968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,1,balanced,0.4871679941813151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,1,balanced,0.531269351641337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,1,balanced,0.5837653477986654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,1,balanced,0.6932960351308187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,1,balanced,0.8152746359507242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,1,balanced,1.2576106389363606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,1,balanced,1.473855972290039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,1,balanced,2.194101333618164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,1,balanced,2.816399892171224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,1,balanced,0.07829866806666057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,1,balanced,0.08106133341789246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,1,balanced,0.08486933509508769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,1,balanced,0.09970666964848836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,1,balanced,0.13514133294423422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,1,balanced,0.18404799699783325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,1,balanced,0.18805332978566489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,1,balanced,0.19004799922307333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,1,balanced,0.19109867016474405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,1,balanced,0.19328532616297403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,1,balanced,0.19870932896931967
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,1,balanced,0.20295999447504678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,1,balanced,0.20374399423599243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,1,balanced,0.20997865994771323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,1,balanced,0.2137813369433085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,1,balanced,0.22127467393875122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,1,balanced,0.2347573240598043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,1,balanced,0.26401599248250324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,1,balanced,0.2865546743075053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,1,balanced,0.34408001104990643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,1,balanced,0.4003146489461263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,1,balanced,0.5182506640752157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,1,balanced,0.6334986686706543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,1,balanced,0.9278720219930013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,1,balanced,1.162453333536784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,1,balanced,1.713823954264323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,1,balanced,2.2411680221557617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,1,balanced,0.05034666756788889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,1,balanced,0.05230933427810669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,1,balanced,0.05439466734727224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,1,balanced,0.06432533264160156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,1,balanced,0.0904960036277771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,1,balanced,0.11578133702278137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,1,balanced,0.11877866586049397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,1,balanced,0.12158933281898499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,1,balanced,0.12343999743461609
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,1,balanced,0.12402666608492534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,1,balanced,0.12716799974441528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,1,balanced,0.13054933150609335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,1,balanced,0.1332480013370514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,1,balanced,0.1383626659711202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,1,balanced,0.14357333381970724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,1,balanced,0.1504533290863037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,1,balanced,0.16115199526151022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,1,balanced,0.18851200739542642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,1,balanced,0.20361600319544473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,1,balanced,0.25082133213679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,1,balanced,0.2961546579996745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,1,balanced,0.4203146696090698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,1,balanced,0.49773867925008136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,1,balanced,0.737061341603597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,1,balanced,0.9184959729512533
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,1,balanced,1.3505973815917969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,1,balanced,1.7707734107971191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,16,power_law_1.2,0.025286400318145753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,16,power_law_1.2,0.03852159976959228
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,16,power_law_1.2,0.03258880078792572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,16,power_law_1.2,0.03718400001525879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,16,power_law_1.2,0.03792639970779419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,16,power_law_1.2,0.029836800694465638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,16,power_law_1.2,0.03625600039958954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,16,power_law_1.2,0.036934399604797365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,16,power_law_1.2,0.037376001477241516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,16,power_law_1.2,0.03768959939479828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,16,power_law_1.2,0.03827199935913086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,16,power_law_1.2,0.03866879940032959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,16,power_law_1.2,0.04131839871406555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,16,power_law_1.2,0.04955520033836365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,16,power_law_1.2,0.05061119794845581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,16,power_law_1.2,0.0506879985332489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,16,power_law_1.2,0.07581440210342408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,16,power_law_1.2,0.08536319732666016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,16,power_law_1.2,0.09609599709510804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,16,power_law_1.2,0.12575360536575317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,16,power_law_1.2,0.08564479947090149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,16,power_law_1.2,0.10821759700775146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,16,power_law_1.2,0.12132480144500732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,16,power_law_1.2,0.16561280488967894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,16,power_law_1.2,0.2051392078399658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,16,power_law_1.2,0.3108223915100098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,16,power_law_1.2,0.38638720512390134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,32,power_law_1.2,0.04485760033130646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,32,power_law_1.2,0.04089600145816803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,32,power_law_1.2,0.04273920059204102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,32,power_law_1.2,0.041254401206970215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,32,power_law_1.2,0.04101119935512543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,32,power_law_1.2,0.04311679899692535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,32,power_law_1.2,0.043635201454162595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,32,power_law_1.2,0.04477440118789673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,32,power_law_1.2,0.044915199279785156
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,32,power_law_1.2,0.04598399996757507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,32,power_law_1.2,0.04538240134716034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,32,power_law_1.2,0.04522239863872528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,32,power_law_1.2,0.04704639911651611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,32,power_law_1.2,0.04711039960384369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,32,power_law_1.2,0.05151360034942627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,32,power_law_1.2,0.05317760109901428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,32,power_law_1.2,0.055264002084732054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,32,power_law_1.2,0.061375999450683595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,32,power_law_1.2,0.06595199704170226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,32,power_law_1.2,0.07767040133476258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,32,power_law_1.2,0.08700799942016602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,32,power_law_1.2,0.11160320043563843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,32,power_law_1.2,0.12863359451293946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,32,power_law_1.2,0.1816831946372986
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,32,power_law_1.2,0.2225343942642212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,32,power_law_1.2,0.34104959964752196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,32,power_law_1.2,0.4343616008758545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,2,power_law_1.2,0.0877888023853302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,2,power_law_1.2,0.11484800577163697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,2,power_law_1.2,0.14087680578231812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,2,power_law_1.2,0.17328640222549438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,2,power_law_1.2,0.20779519081115722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,2,power_law_1.2,0.231878399848938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,2,power_law_1.2,0.3007999897003174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,2,power_law_1.2,0.3208832025527954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,2,power_law_1.2,0.31706240177154543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,2,power_law_1.2,0.3210367918014526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,2,power_law_1.2,0.3486464023590088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,4,power_law_1.2,0.06945279836654664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,2,power_law_1.2,0.3379199981689453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,4,power_law_1.2,0.08393599987030029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,2,power_law_1.2,0.3360703945159912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,4,power_law_1.2,0.08647040128707886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,2,power_law_1.2,0.37029759883880614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,2,power_law_1.2,0.3693824052810669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,4,power_law_1.2,0.1023743987083435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,2,power_law_1.2,0.3775871992111206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,4,power_law_1.2,0.10911999940872193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,2,power_law_1.2,0.38876159191131593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,2,power_law_1.2,0.415231990814209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,4,power_law_1.2,0.12480000257492066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,2,power_law_1.2,0.4521279811859131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,4,power_law_1.2,0.13100800514221192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,2,power_law_1.2,0.5063231945037842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,4,power_law_1.2,0.13616640567779542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,2,power_law_1.2,0.5509247779846191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,4,power_law_1.2,0.1398911952972412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,2,power_law_1.2,0.697753620147705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,4,power_law_1.2,0.13717119693756102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,2,power_law_1.2,0.7856063842773438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,4,power_law_1.2,0.14464000463485718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,2,power_law_1.2,1.0598208427429199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,4,power_law_1.2,0.14693119525909423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,4,power_law_1.2,0.14406399726867675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,2,power_law_1.2,1.297036838531494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,4,power_law_1.2,0.14914560317993164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,4,power_law_1.2,0.15047680139541625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,2,power_law_1.2,1.7221439361572266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,4,power_law_1.2,0.15535999536514283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,4,power_law_1.2,0.16013439893722534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,2,power_law_1.2,2.1505088806152344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,4,power_law_1.2,0.1735360026359558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,4,power_law_1.2,0.19189120531082154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,4,power_law_1.2,0.22614400386810302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,4,power_law_1.2,0.2521087884902954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,4,power_law_1.2,0.2995584011077881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,4,power_law_1.2,0.3709952116012573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,4,power_law_1.2,0.5444928169250488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,4,power_law_1.2,0.7729152202606201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,4,power_law_1.2,1.05863676071167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,4,power_law_1.2,1.2592191696166992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,8,balanced,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,8,balanced,0.02126399924357732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,8,balanced,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,8,balanced,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,8,balanced,0.02826133370399475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,8,balanced,0.043354665239652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,8,balanced,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,8,balanced,0.04618666569391886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,8,balanced,0.05226133267084757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,8,balanced,0.052144000927607216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,8,balanced,0.05367999772230784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,8,balanced,0.07189333438873291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,8,balanced,0.06781333188215892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,8,balanced,0.06806399921576183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,8,balanced,0.07860800127188365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,8,balanced,0.07830933233102162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,8,balanced,0.08271466692288716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,8,balanced,0.09867733716964722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,8,balanced,0.09992532928784688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,8,balanced,0.11044800281524658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,8,balanced,0.12211199601491292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,8,balanced,0.13697600364685059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,8,balanced,0.15738133589426676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,8,balanced,0.18363199631373087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,8,balanced,0.21421333154042563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,8,balanced,0.33452800909678143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,8,balanced,0.39160533746083576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,128,power_law_1.01,0.053529602289199826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,128,power_law_1.01,0.05389440059661865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,128,power_law_1.01,0.05235840082168579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,128,power_law_1.01,0.05559039711952209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,128,power_law_1.01,0.05392000079154968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,128,power_law_1.01,0.054630398750305176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,128,power_law_1.01,0.054771202802658084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,128,power_law_1.01,0.05492479801177978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,128,power_law_1.01,0.05562880039215088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,128,power_law_1.01,0.05557119846343994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,128,power_law_1.01,0.055878400802612305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,128,power_law_1.01,0.056006401777267456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,128,power_law_1.01,0.05691519975662231
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,128,power_law_1.01,0.05737599730491638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,128,power_law_1.01,0.06056320071220398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,128,power_law_1.01,0.06117759943008423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,128,power_law_1.01,0.06327040195465088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,128,power_law_1.01,0.06996480226516724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,128,power_law_1.01,0.07592319846153259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,128,power_law_1.01,0.08687360286712646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,128,power_law_1.01,0.09699199795722961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,128,power_law_1.01,0.1210368037223816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,128,power_law_1.01,0.1419584035873413
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,128,power_law_1.01,0.1823807954788208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,128,power_law_1.01,0.24399359226226808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,128,power_law_1.01,0.3377664089202881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,128,power_law_1.01,0.43546881675720217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,64,balanced,0.04740266501903534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,64,balanced,0.045647998650868736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,64,balanced,0.04371733466784159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,64,balanced,0.0459146648645401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,64,balanced,0.04971200227737427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,64,balanced,0.04796266555786133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,64,balanced,0.049866666396458946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,64,balanced,0.0498879998922348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,64,balanced,0.05100266635417938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,64,balanced,0.04967466493447622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,64,balanced,0.05186666548252106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,64,balanced,0.051781331499417625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,64,balanced,0.05161066850026449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,64,balanced,0.05382933219273885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,64,balanced,0.05706666906674703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,64,balanced,0.057802667220433555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,64,balanced,0.06102400024731954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,64,balanced,0.06603200236956279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,64,balanced,0.06822933256626129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,64,balanced,0.07622399926185608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,64,balanced,0.08257066706816356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,64,balanced,0.09683199723561604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,64,balanced,0.11129599809646606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,64,balanced,0.15044266978899637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,64,balanced,0.18179200092951456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,64,balanced,0.24843200047810873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,64,balanced,0.3072959979375203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,64,power_law_1.01,0.02248319983482361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,64,power_law_1.01,0.021932800114154816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,64,power_law_1.01,0.020844799280166627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,64,power_law_1.01,0.021055999398231506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,64,power_law_1.01,0.023705600202083586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,64,power_law_1.01,0.028697600960731505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,64,power_law_1.01,0.03866879940032959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,64,power_law_1.01,0.037990400195121767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,64,power_law_1.01,0.042131200432777405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,64,power_law_1.01,0.04175359904766083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,64,power_law_1.01,0.041433599591255185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,64,power_law_1.01,0.041971200704574586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,64,power_law_1.01,0.040166398882865904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,64,power_law_1.01,0.04058879911899567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,64,power_law_1.01,0.03900800049304962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,64,power_law_1.01,0.03723520040512085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,64,power_law_1.01,0.039647999405860904
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,64,power_law_1.01,0.037945601344108584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,64,power_law_1.01,0.04497919976711273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,64,power_law_1.01,0.05297920107841492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,64,power_law_1.01,0.05772160291671753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,64,power_law_1.01,0.07294719815254211
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,64,power_law_1.01,0.0934719979763031
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,64,power_law_1.01,0.12344959974288941
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,64,power_law_1.01,0.157478404045105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,64,power_law_1.01,0.21227519512176513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,64,power_law_1.01,0.289574408531189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,128,power_law_1.2,0.05996800065040588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,128,power_law_1.2,0.06136959791183472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,128,power_law_1.2,0.06147199869155884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,128,power_law_1.2,0.06059520244598389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,128,power_law_1.2,0.05720319747924805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,128,power_law_1.2,0.05911679863929749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,128,power_law_1.2,0.05799040198326111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,128,power_law_1.2,0.060031998157501223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,128,power_law_1.2,0.05898879766464234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,128,power_law_1.2,0.059654402732849124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,128,power_law_1.2,0.061267197132110596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,128,power_law_1.2,0.06266239881515503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,128,power_law_1.2,0.0630079984664917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,128,power_law_1.2,0.0605184018611908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,128,power_law_1.2,0.06384000182151794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,128,power_law_1.2,0.0636031985282898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,128,power_law_1.2,0.06659839749336242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,128,power_law_1.2,0.07338879704475403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,128,power_law_1.2,0.08134400248527526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,128,power_law_1.2,0.09857280254364013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,128,power_law_1.2,0.11623680591583252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,128,power_law_1.2,0.14028799533843994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,128,power_law_1.2,0.1673087954521179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,128,power_law_1.2,0.2390592098236084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,128,power_law_1.2,0.2908031940460205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,128,power_law_1.2,0.4477759838104248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,128,power_law_1.2,0.5814655780792236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,8,power_law_1.2,0.04390400052070618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,8,power_law_1.2,0.045414400100708005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,8,power_law_1.2,0.04100480079650879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,8,power_law_1.2,0.04423680007457733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,8,power_law_1.2,0.04595839977264404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,8,power_law_1.2,0.04514560103416443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,8,power_law_1.2,0.04747520089149475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,8,power_law_1.2,0.04755200147628784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,8,power_law_1.2,0.04709759950637817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,8,power_law_1.2,0.0484607994556427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,8,power_law_1.2,0.04842880070209503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,8,power_law_1.2,0.05012480020523071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,8,power_law_1.2,0.05144960284233093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,8,power_law_1.2,0.05178239941596985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,8,power_law_1.2,0.05551360249519348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,8,power_law_1.2,0.058271998167037965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,8,power_law_1.2,0.061427199840545656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,8,power_law_1.2,0.06814720034599304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,8,power_law_1.2,0.07492480278015137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,8,power_law_1.2,0.08976640105247498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,8,power_law_1.2,0.10501760244369507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,8,power_law_1.2,0.13642879724502563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,8,power_law_1.2,0.1730239987373352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,8,power_law_1.2,0.213318395614624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,8,power_law_1.2,0.31780478954315183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,8,power_law_1.2,0.5001215934753418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,8,power_law_1.2,0.6022079944610595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,1,balanced,0.01966399947802226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,1,balanced,0.020432000358899433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,1,balanced,0.021482666333516438
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,1,balanced,0.025194667279720306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,1,balanced,0.031498665610949196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,1,balanced,0.04799466828505198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,1,balanced,0.05070933202902476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,1,balanced,0.05223466455936432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,1,balanced,0.052576000491778054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,1,balanced,0.055567999680837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,1,balanced,0.05793066819508871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,1,balanced,0.05989866455396017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,1,balanced,0.06239999830722809
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,1,balanced,0.0684799998998642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,1,balanced,0.06706133484840393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,1,balanced,0.07055999835332234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,1,balanced,0.07804800073305766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,1,balanced,0.07602666815121968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,1,balanced,0.08479467034339905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,1,balanced,0.09915199875831604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,1,balanced,0.11055466532707214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,1,balanced,0.161189337571462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,1,balanced,0.18833067019780478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,1,balanced,0.19646400213241577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,1,balanced,0.20986666282018027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,1,balanced,0.3540639877319336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,1,balanced,0.37836798032124835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,16,power_law_1.01,0.04287360012531281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,16,power_law_1.01,0.04609279930591583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,16,power_law_1.01,0.04424319863319397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,16,power_law_1.01,0.04545280039310455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,16,power_law_1.01,0.04625920057296753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,16,power_law_1.01,0.04527359902858734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,16,power_law_1.01,0.04607360064983368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,16,power_law_1.01,0.04670720100402832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,16,power_law_1.01,0.0468095988035202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,16,power_law_1.01,0.047628799080848695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,16,power_law_1.01,0.05016319751739502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,16,power_law_1.01,0.05271040201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,16,power_law_1.01,0.05382400155067444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,16,power_law_1.01,0.056620800495147706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,16,power_law_1.01,0.05922560095787048
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,16,power_law_1.01,0.061843198537826535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,16,power_law_1.01,0.06599040031433105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,16,power_law_1.01,0.07370240092277527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,16,power_law_1.01,0.07285119891166687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,16,power_law_1.01,0.08789759874343872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,16,power_law_1.01,0.10078719854354859
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,16,power_law_1.01,0.12319999933242798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,16,power_law_1.01,0.14304640293121337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,16,power_law_1.01,0.18888959884643555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,16,power_law_1.01,0.23624320030212403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,16,power_law_1.01,0.31899518966674806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,16,power_law_1.01,0.4212224006652832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,16,power_law_1.2,0.04385280013084412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,16,power_law_1.2,0.06023039817810059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,16,power_law_1.2,0.04954879879951477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,16,power_law_1.2,0.05911039710044861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,16,power_law_1.2,0.06041600108146668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,16,power_law_1.2,0.05111680030822754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,16,power_law_1.2,0.05311359763145447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,16,power_law_1.2,0.05656319856643677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,16,power_law_1.2,0.05740799903869629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,16,power_law_1.2,0.053990399837493895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,16,power_law_1.2,0.05735039710998535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,16,power_law_1.2,0.060236799716949466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,16,power_law_1.2,0.06291840076446534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,16,power_law_1.2,0.0656063973903656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,16,power_law_1.2,0.07461119890213012
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,16,power_law_1.2,0.07740160226821899
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,16,power_law_1.2,0.08015999794006348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,16,power_law_1.2,0.09422720074653626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,16,power_law_1.2,0.09919999837875366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,16,power_law_1.2,0.1180799961090088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,16,power_law_1.2,0.13320319652557372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,16,power_law_1.2,0.17076480388641357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,16,power_law_1.2,0.20223360061645507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,16,power_law_1.2,0.2762176036834717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,16,power_law_1.2,0.3395967960357666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,16,power_law_1.2,0.503872013092041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,16,power_law_1.2,0.6438720226287842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,32,power_law_1.2,0.01809920072555542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,32,power_law_1.2,0.01775359958410263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,32,power_law_1.2,0.01804800033569336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,32,power_law_1.2,0.017798399925231932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,32,power_law_1.2,0.01897599995136261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,32,power_law_1.2,0.019782400131225585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,32,power_law_1.2,0.02240640074014664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,32,power_law_1.2,0.02319999933242798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,32,power_law_1.2,0.03610239923000336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,32,power_law_1.2,0.03611519932746887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,32,power_law_1.2,0.03688960075378418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,32,power_law_1.2,0.03437440097332001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,32,power_law_1.2,0.03452160060405731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,32,power_law_1.2,0.03404799997806549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,32,power_law_1.2,0.034272000193595886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,32,power_law_1.2,0.035071998834609985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,32,power_law_1.2,0.03608959913253784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,32,power_law_1.2,0.0380160003900528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,32,power_law_1.2,0.039878401160240176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,32,power_law_1.2,0.043968001008033754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,32,power_law_1.2,0.049369600415229795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,32,power_law_1.2,0.06247680187225342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,32,power_law_1.2,0.07593600153923034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,32,power_law_1.2,0.09338880181312562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,32,power_law_1.2,0.12080639600753784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,32,power_law_1.2,0.16293760538101196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,32,power_law_1.2,0.21130878925323487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,16,power_law_1.01,0.0423552006483078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,16,power_law_1.01,0.048172798752784726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,16,power_law_1.01,0.04490880072116852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,16,power_law_1.01,0.04631040096282959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,16,power_law_1.01,0.04657280147075653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,16,power_law_1.01,0.046758401393890384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,16,power_law_1.01,0.04718720018863678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,16,power_law_1.01,0.04805119931697845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,16,power_law_1.01,0.04855040013790131
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,16,power_law_1.01,0.048902401328086854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,16,power_law_1.01,0.04968959987163544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,16,power_law_1.01,0.05089920163154602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,16,power_law_1.01,0.05189120173454285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,16,power_law_1.01,0.05511040091514587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,16,power_law_1.01,0.0585536003112793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,16,power_law_1.01,0.06096000075340271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,16,power_law_1.01,0.0630016028881073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,16,power_law_1.01,0.07112320065498352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,16,power_law_1.01,0.07440000176429748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,16,power_law_1.01,0.09008640050888062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,16,power_law_1.01,0.0982591986656189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,16,power_law_1.01,0.13116159439086914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,16,power_law_1.01,0.16021759510040284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,16,power_law_1.01,0.22061440944671631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,16,power_law_1.01,0.2821439981460571
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,16,power_law_1.01,0.39431679248809814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,16,power_law_1.01,0.517248010635376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,1,balanced,0.05041066805521647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,1,balanced,0.05021866659323374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,1,balanced,0.05247466762860616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,1,balanced,0.06211733321348826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,1,balanced,0.08494933446248372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,1,balanced,0.12205866972605388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,1,balanced,0.12249066432317098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,1,balanced,0.12377599875132243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,1,balanced,0.12738666931788126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,1,balanced,0.12782399853070578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,1,balanced,0.12821867068608603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,1,balanced,0.13160000244776407
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,1,balanced,0.13210133711496988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,1,balanced,0.13402666648228964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,1,balanced,0.14040000240008035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,1,balanced,0.1410719950993856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,1,balanced,0.14623467127482095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,1,balanced,0.1649066706498464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,1,balanced,0.1785866618156433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,1,balanced,0.21132266521453857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,1,balanced,0.2455679972966512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,1,balanced,0.3591039975484212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,1,balanced,0.4012426535288493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,1,balanced,0.5901866753896078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,1,balanced,0.7264426549275717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,1,balanced,1.0576213200887044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,1,balanced,1.3851253191630046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,1,power_law_1.2,0.103603196144104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,1,power_law_1.2,0.12008320093154908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,1,power_law_1.2,0.1485759973526001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,1,power_law_1.2,0.16289279460906983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,1,power_law_1.2,0.17404160499572754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,1,power_law_1.2,0.19241600036621093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,1,power_law_1.2,0.23761279582977296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,1,power_law_1.2,0.24216320514678955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,1,power_law_1.2,0.24730238914489747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,1,power_law_1.2,0.25189759731292727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,1,power_law_1.2,0.26124799251556396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,1,power_law_1.2,0.27083520889282225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,1,power_law_1.2,0.276307201385498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,1,power_law_1.2,0.2826751947402954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,1,power_law_1.2,0.2838399887084961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,1,power_law_1.2,0.2916928052902222
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,1,power_law_1.2,0.3027712106704712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,1,power_law_1.2,0.3358975887298584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,1,power_law_1.2,0.37204480171203613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,1,power_law_1.2,0.4431424140930176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,1,power_law_1.2,0.5244863986968994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,1,power_law_1.2,0.6423615932464599
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,1,power_law_1.2,0.7810304164886475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,1,power_law_1.2,1.061843204498291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,1,power_law_1.2,1.3426048278808593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,1,power_law_1.2,1.8818880081176759
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,1,power_law_1.2,2.490208053588867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,128,power_law_1.2,0.03898879885673523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,128,power_law_1.2,0.03935999870300293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,128,power_law_1.2,0.038924801349639895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,128,power_law_1.2,0.04302720129489899
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,128,power_law_1.2,0.04382719993591309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,128,power_law_1.2,0.044998401403427125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,128,power_law_1.2,0.04683519899845123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,128,power_law_1.2,0.04684160053730011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,128,power_law_1.2,0.04684160053730011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,128,power_law_1.2,0.0486272007226944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,128,power_law_1.2,0.049465599656105044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,128,power_law_1.2,0.05076479911804199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,128,power_law_1.2,0.04984959959983826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,128,power_law_1.2,0.05422719717025757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,128,power_law_1.2,0.06074240207672119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,128,power_law_1.2,0.05919359922409058
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,128,power_law_1.2,0.06570240259170532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,128,power_law_1.2,0.0747327983379364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,128,power_law_1.2,0.08173440098762512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,128,power_law_1.2,0.09918720126152039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,128,power_law_1.2,0.12449920177459717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,128,power_law_1.2,0.16338560581207276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,128,power_law_1.2,0.216211199760437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,128,power_law_1.2,0.29139840602874756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,128,power_law_1.2,0.3656512022018433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,128,power_law_1.2,0.6580927848815918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,128,power_law_1.2,0.9360447883605957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,16,power_law_1.01,0.045977601408958436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,16,power_law_1.01,0.058278399705886844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,16,power_law_1.01,0.05581439733505249
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,16,power_law_1.01,0.06114559769630432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,16,power_law_1.01,0.05716480016708374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,16,power_law_1.01,0.05629439949989319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,16,power_law_1.01,0.05989760160446167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,16,power_law_1.01,0.05777279734611511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,16,power_law_1.01,0.06117759943008423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,16,power_law_1.01,0.06254720091819763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,16,power_law_1.01,0.06044800281524658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,16,power_law_1.01,0.06220160126686096
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,16,power_law_1.01,0.06596480011940002
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,16,power_law_1.01,0.06814079880714416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,16,power_law_1.01,0.07223680019378662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,16,power_law_1.01,0.07182719707489013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,16,power_law_1.01,0.07445759773254394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,16,power_law_1.01,0.08424959778785705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,16,power_law_1.01,0.08366720080375671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,16,power_law_1.01,0.09910399913787842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,16,power_law_1.01,0.11016319990158081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,16,power_law_1.01,0.13244800567626952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,16,power_law_1.01,0.1535871982574463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,16,power_law_1.01,0.2034559965133667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,16,power_law_1.01,0.2559679985046387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,16,power_law_1.01,0.33626880645751955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,16,power_law_1.01,0.428115177154541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,1,power_law_1.01,0.051769602298736575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,1,power_law_1.01,0.06376320123672485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,1,power_law_1.01,0.07954559922218322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,1,power_law_1.01,0.11592320203781128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,1,power_law_1.01,0.15136640071868895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,1,power_law_1.01,0.18732160329818726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,1,power_law_1.01,0.23558399677276612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,1,power_law_1.01,0.25039360523223875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,1,power_law_1.01,0.2508543968200684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,1,power_law_1.01,0.26506240367889405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,1,power_law_1.01,0.2718463897705078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,1,power_law_1.01,0.2804863929748535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,1,power_law_1.01,0.2908031940460205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,1,power_law_1.01,0.3012095928192139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,1,power_law_1.01,0.30689918994903564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,1,power_law_1.01,0.3168576002120972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,1,power_law_1.01,0.3397056102752686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,1,power_law_1.01,0.3815232038497925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,1,power_law_1.01,0.402188777923584
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,1,power_law_1.01,0.475980806350708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,1,power_law_1.01,0.526643180847168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,1,power_law_1.01,0.6529151916503906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,1,power_law_1.01,0.8005248069763183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,1,power_law_1.01,1.0721983909606934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,1,power_law_1.01,1.3254847526550293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,1,power_law_1.01,1.847609519958496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,1,power_law_1.01,2.3548095703125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,8,power_law_1.2,0.05532159805297852
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,8,power_law_1.2,0.06079360246658325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,8,power_law_1.2,0.057196801900863646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,8,power_law_1.2,0.06476799845695495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,8,power_law_1.2,0.07208960056304932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,8,power_law_1.2,0.06945279836654664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,8,power_law_1.2,0.07074559926986694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,8,power_law_1.2,0.07351040244102477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,8,power_law_1.2,0.07196159958839417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,8,power_law_1.2,0.07175679802894593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,8,power_law_1.2,0.07393280267715455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,8,power_law_1.2,0.07332479953765869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,8,power_law_1.2,0.0746944010257721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,8,power_law_1.2,0.07804160118103028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,8,power_law_1.2,0.08401920199394226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,8,power_law_1.2,0.08529919981956482
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,8,power_law_1.2,0.09063040018081665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,8,power_law_1.2,0.0981760025024414
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,8,power_law_1.2,0.1076159954071045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,8,power_law_1.2,0.1297279953956604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,8,power_law_1.2,0.14766720533370972
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,8,power_law_1.2,0.1853119969367981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,8,power_law_1.2,0.224019193649292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,8,power_law_1.2,0.3037888050079346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,8,power_law_1.2,0.3791935920715332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,8,power_law_1.2,0.5349823951721191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,8,power_law_1.2,0.6955135822296142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,128,power_law_1.01,0.04104959964752197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,128,power_law_1.01,0.04165120124816894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,128,power_law_1.01,0.040966400504112245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,128,power_law_1.01,0.04431360065937042
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,128,power_law_1.01,0.045100799202919005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,128,power_law_1.01,0.04581120014190674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,128,power_law_1.01,0.046982398629188536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,128,power_law_1.01,0.049292799830436704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,128,power_law_1.01,0.049702399969100954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,128,power_law_1.01,0.050297600030899045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,128,power_law_1.01,0.05360640287399292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,128,power_law_1.01,0.05555199980735779
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,128,power_law_1.01,0.051283198595047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,128,power_law_1.01,0.05501440167427063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,128,power_law_1.01,0.060844802856445314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,128,power_law_1.01,0.060710400342941284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,128,power_law_1.01,0.06601600050926208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,128,power_law_1.01,0.07236480116844177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,128,power_law_1.01,0.07920640110969543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,128,power_law_1.01,0.09475200176239014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,128,power_law_1.01,0.11263359785079956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,128,power_law_1.01,0.14471039772033692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,128,power_law_1.01,0.18183679580688478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,128,power_law_1.01,0.2551039934158325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,128,power_law_1.01,0.3185472011566162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,128,power_law_1.01,0.46508159637451174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,128,power_law_1.01,0.6457856178283692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,64,power_law_1.01,0.0564736008644104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,64,power_law_1.01,0.0421887993812561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,64,power_law_1.01,0.0417279988527298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,64,power_law_1.01,0.04332799911499023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,64,power_law_1.01,0.04296959936618805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,64,power_law_1.01,0.04410879909992218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,64,power_law_1.01,0.04479359984397888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,64,power_law_1.01,0.04524799883365631
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,64,power_law_1.01,0.045440000295639035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,64,power_law_1.01,0.0459199994802475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,64,power_law_1.01,0.0469760000705719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,64,power_law_1.01,0.04798719882965088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,64,power_law_1.01,0.048467200994491574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,64,power_law_1.01,0.050393599271774295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,64,power_law_1.01,0.05421440005302429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,64,power_law_1.01,0.05485439896583557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,64,power_law_1.01,0.058169597387313844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,64,power_law_1.01,0.0648256003856659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,64,power_law_1.01,0.06762880086898804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,64,power_law_1.01,0.07927039861679078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,64,power_law_1.01,0.0886784017086029
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,64,power_law_1.01,0.10952960252761841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,64,power_law_1.01,0.13919999599456787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,64,power_law_1.01,0.19984639883041383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,64,power_law_1.01,0.24476799964904786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,64,power_law_1.01,0.3515903949737549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,64,power_law_1.01,0.43633279800415037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,1,balanced,0.05003733436266581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,1,balanced,0.052186667919158936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,1,balanced,0.054431999723116554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,1,balanced,0.07186666627724965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,1,balanced,0.10342400272687276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,1,balanced,0.13946666320165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,1,balanced,0.1440000037352244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,1,balanced,0.14574933052062988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,1,balanced,0.1460693379243215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,1,balanced,0.1471733351548513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,1,balanced,0.1516853372255961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,1,balanced,0.15621333320935568
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,1,balanced,0.15757866700490317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,1,balanced,0.16546666622161865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,1,balanced,0.16873600085576376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,1,balanced,0.17756799856821695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,1,balanced,0.18826133012771606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,1,balanced,0.21693867444992065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,1,balanced,0.23545066515604654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,1,balanced,0.28749332825342816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,1,balanced,0.3498773177464803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,1,balanced,0.45110400517781574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,1,balanced,0.5475519895553589
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,1,balanced,0.791045347849528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,1,balanced,0.9896426995595297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,1,balanced,1.4439627329508464
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,1,balanced,1.8625973065694172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,128,balanced,0.0458133320013682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,128,balanced,0.04595733185609182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,128,balanced,0.0459146648645401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,128,balanced,0.04755199948946635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,128,balanced,0.05000533163547516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,128,balanced,0.05416533350944519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,128,balanced,0.056346664826075234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,128,balanced,0.05406400064627329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,128,balanced,0.05410666763782501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,128,balanced,0.05473599831263224
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,128,balanced,0.05629866818586985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,128,balanced,0.060346667965253196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,128,balanced,0.05788266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,128,balanced,0.05829333265622457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,128,balanced,0.06203199923038483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,128,balanced,0.062224000692367554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,128,balanced,0.06564799944559734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,128,balanced,0.07042133311430614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,128,balanced,0.07538133362929027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,128,balanced,0.08072000245253245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,128,balanced,0.09115733702977498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,128,balanced,0.10552533467610677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,128,balanced,0.1272586683432261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,128,balanced,0.183514674504598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,128,balanced,0.22560532887776694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,128,balanced,0.31065066655476886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,128,balanced,0.3965333302815755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,4,balanced,0.07264000177383423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,4,balanced,0.07439466814200084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,4,balanced,0.07469866673151652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,4,balanced,0.0919040044148763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,4,balanced,0.1092639962832133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,4,balanced,0.14380799730618796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,4,balanced,0.14632532993952432
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,4,balanced,0.14849066734313965
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,4,balanced,0.14611732959747314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,4,balanced,0.1493760049343109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,4,balanced,0.14813866217931113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,4,balanced,0.15158399939537048
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,4,balanced,0.15341867009798685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,4,balanced,0.1543359955151876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,4,balanced,0.1569813291231791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,4,balanced,0.15802133083343506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,4,balanced,0.16269333163897196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,4,balanced,0.17037334044774374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,4,balanced,0.1800640026728312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,4,balanced,0.2015893260637919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,4,balanced,0.21765865882237753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,4,balanced,0.25434666872024536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,4,balanced,0.29124265909194946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,4,balanced,0.4143413305282593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,4,balanced,0.4986079931259155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,4,balanced,0.7078986962636312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,4,balanced,0.9231359958648682
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,4,balanced,0.01931200052301089
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,4,balanced,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,4,balanced,0.021013334393501282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,4,balanced,0.02310933421055476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,4,balanced,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,4,balanced,0.04148799926042557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,4,balanced,0.04401599864164988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,4,balanced,0.04458666841189066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,4,balanced,0.045653333266576133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,4,balanced,0.04774933556715647
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,4,balanced,0.049925332268079124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,4,balanced,0.05387733379999796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,4,balanced,0.05474133292833964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,4,balanced,0.056517332792282104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,4,balanced,0.07145066559314728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,4,balanced,0.07114666700363159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,4,balanced,0.07293866574764252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,4,balanced,0.08711466193199158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,4,balanced,0.09588266412417094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,4,balanced,0.11864533027013142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,4,balanced,0.12362666924794515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,4,balanced,0.13924266894658408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,4,balanced,0.15447466572125754
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,4,balanced,0.17411200205485025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,4,balanced,0.19477866093317667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,4,balanced,0.3155200084050496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,4,balanced,0.35685332616170246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,4,power_law_1.2,0.06170240044593811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,4,power_law_1.2,0.0675711989402771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,4,power_law_1.2,0.07544959783554077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,4,power_law_1.2,0.08559359908103943
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,4,power_law_1.2,0.0925055980682373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,4,power_law_1.2,0.09943680167198181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,4,power_law_1.2,0.1068992018699646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,4,power_law_1.2,0.10515199899673462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,4,power_law_1.2,0.10915839672088623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,4,power_law_1.2,0.11016319990158081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,4,power_law_1.2,0.11239039897918701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,4,power_law_1.2,0.11368319988250733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,4,power_law_1.2,0.11559040546417236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,4,power_law_1.2,0.11969280242919922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,4,power_law_1.2,0.12138880491256714
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,4,power_law_1.2,0.12303359508514404
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,4,power_law_1.2,0.12804479598999025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,4,power_law_1.2,0.1397503972053528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,4,power_law_1.2,0.15635199546813966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,4,power_law_1.2,0.18894720077514648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,4,power_law_1.2,0.20869119167327882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,4,power_law_1.2,0.2577663898468018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,4,power_law_1.2,0.32065279483795167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,4,power_law_1.2,0.4769279956817627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,4,power_law_1.2,0.5998144149780273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,4,power_law_1.2,0.9056320190429688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,4,power_law_1.2,1.1683648109436036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,1,power_law_1.01,0.09111679792404175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,1,power_law_1.01,0.09901440143585205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,1,power_law_1.01,0.11438080072402954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,1,power_law_1.01,0.1552191972732544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,1,power_law_1.01,0.17918720245361328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,1,power_law_1.01,0.20683519840240477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,1,power_law_1.01,0.2567231893539429
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,1,power_law_1.01,0.25999999046325684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,1,power_law_1.01,0.2619647979736328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,1,power_law_1.01,0.268339204788208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,1,power_law_1.01,0.2722176074981689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,1,power_law_1.01,0.2787391901016235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,1,power_law_1.01,0.28853759765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,1,power_law_1.01,0.29509758949279785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,1,power_law_1.01,0.3050175905227661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,1,power_law_1.01,0.31296639442443847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,1,power_law_1.01,0.3243904113769531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,1,power_law_1.01,0.3575551986694336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,1,power_law_1.01,0.3949055910110474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,1,power_law_1.01,0.4767104148864746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,1,power_law_1.01,0.5527488231658936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,1,power_law_1.01,0.7008575916290283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,1,power_law_1.01,0.8503359794616699
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,1,power_law_1.01,1.1460736274719239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,1,power_law_1.01,1.4377087593078612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,1,power_law_1.01,2.014790344238281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,1,power_law_1.01,2.5879999160766602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,4,power_law_1.01,0.018246400356292724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,4,power_law_1.01,0.020095999538898467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,4,power_law_1.01,0.021459199488162994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,4,power_law_1.01,0.025939199328422546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,4,power_law_1.01,0.026867198944091796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,4,power_law_1.01,0.03123840093612671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,4,power_law_1.01,0.03306879997253418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,4,power_law_1.01,0.03594239950180054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,4,power_law_1.01,0.03790720105171204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,4,power_law_1.01,0.03777920007705689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,4,power_law_1.01,0.03969280123710632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,4,power_law_1.01,0.04387840032577515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,4,power_law_1.01,0.04200319945812225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,4,power_law_1.01,0.04425599873065948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,4,power_law_1.01,0.055283200740814206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,4,power_law_1.01,0.052211201190948485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,4,power_law_1.01,0.05008000135421753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,4,power_law_1.01,0.07429760098457336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,4,power_law_1.01,0.08044800162315369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,4,power_law_1.01,0.0927232027053833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,4,power_law_1.01,0.09294080138206481
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,4,power_law_1.01,0.1023360013961792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,4,power_law_1.01,0.12172800302505493
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,4,power_law_1.01,0.13838720321655273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,4,power_law_1.01,0.1680575966835022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,4,power_law_1.01,0.23149440288543702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,4,power_law_1.01,0.29360640048980713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,8,power_law_1.01,0.044819200038909913
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,8,power_law_1.01,0.048044800758361816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,8,power_law_1.01,0.047577598690986635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,8,power_law_1.01,0.05382400155067444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,8,power_law_1.01,0.05392000079154968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,8,power_law_1.01,0.05985280275344849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,8,power_law_1.01,0.05978879928588867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,8,power_law_1.01,0.05772799849510193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,8,power_law_1.01,0.06112639904022217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,8,power_law_1.01,0.06348159909248352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,8,power_law_1.01,0.06162559986114502
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,8,power_law_1.01,0.06654719710350036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,8,power_law_1.01,0.06595839858055115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,8,power_law_1.01,0.07945600152015686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,8,power_law_1.01,0.08528640270233154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,8,power_law_1.01,0.20363519191741944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,8,power_law_1.01,0.0791808009147644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,8,power_law_1.01,0.09308159947395325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,8,power_law_1.01,0.09404159784317016
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,8,power_law_1.01,0.10537600517272949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,8,power_law_1.01,0.12227200269699097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,8,power_law_1.01,0.14636800289154053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,8,power_law_1.01,0.1789888024330139
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,8,power_law_1.01,0.2302720069885254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,8,power_law_1.01,0.2919680118560791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,8,power_law_1.01,0.41294078826904296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,8,power_law_1.01,0.5096384048461914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,16,power_law_1.01,0.05384320020675659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,16,power_law_1.01,0.06934400200843811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,16,power_law_1.01,0.06614400148391723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,16,power_law_1.01,0.06986879706382751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,16,power_law_1.01,0.07175679802894593
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,16,power_law_1.01,0.06934400200843811
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,16,power_law_1.01,0.06910719871520996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,16,power_law_1.01,0.07314559817314148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,16,power_law_1.01,0.07097600102424621
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,16,power_law_1.01,0.07328640222549439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,16,power_law_1.01,0.07426559925079346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,16,power_law_1.01,0.07530879974365234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,16,power_law_1.01,0.0750976026058197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,16,power_law_1.01,0.07685120105743408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,16,power_law_1.01,0.08300160169601441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,16,power_law_1.01,0.08373119831085205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,16,power_law_1.01,0.08568320274353028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,16,power_law_1.01,0.09464319944381713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,16,power_law_1.01,0.10293760299682617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,16,power_law_1.01,0.12340480089187622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,16,power_law_1.01,0.13980159759521485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,16,power_law_1.01,0.17567360401153564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,16,power_law_1.01,0.21492478847503663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,16,power_law_1.01,0.2846848011016846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,16,power_law_1.01,0.36058878898620605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,16,power_law_1.01,0.5197696208953857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,16,power_law_1.01,0.7027328014373779
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,128,power_law_1.01,0.04529280066490173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,128,power_law_1.01,0.04447360038757324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,128,power_law_1.01,0.04490239918231964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,128,power_law_1.01,0.046323201060295104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,128,power_law_1.01,0.046982398629188536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,128,power_law_1.01,0.047712001204490664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,128,power_law_1.01,0.048467200994491574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,128,power_law_1.01,0.04916479885578155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,128,power_law_1.01,0.04913919866085052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,128,power_law_1.01,0.04937599897384644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,128,power_law_1.01,0.050432002544403075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,128,power_law_1.01,0.050809597969055174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,128,power_law_1.01,0.051622402667999265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,128,power_law_1.01,0.053350400924682614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,128,power_law_1.01,0.056377601623535153
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,128,power_law_1.01,0.058355200290679934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,128,power_law_1.01,0.06030719876289368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,128,power_law_1.01,0.0657920002937317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,128,power_law_1.01,0.07050880193710327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,128,power_law_1.01,0.08156800270080566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,128,power_law_1.01,0.09086719751358033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,128,power_law_1.01,0.11755520105361938
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,128,power_law_1.01,0.13950079679489136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,128,power_law_1.01,0.19886720180511475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,128,power_law_1.01,0.24456319808959961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,128,power_law_1.01,0.357856011390686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,128,power_law_1.01,0.4580927848815918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,256,power_law_1.2,0.018041600286960603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,256,power_law_1.2,0.017977599799633027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,256,power_law_1.2,0.018662400543689728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,256,power_law_1.2,0.0208639994263649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,256,power_law_1.2,0.03475840091705322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,256,power_law_1.2,0.029811200499534608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,256,power_law_1.2,0.029862400889396668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,256,power_law_1.2,0.02995840013027191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,256,power_law_1.2,0.029651200771331786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,256,power_law_1.2,0.030092799663543703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,256,power_law_1.2,0.030476799607276915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,256,power_law_1.2,0.03061760067939758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,256,power_law_1.2,0.02948479950428009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,256,power_law_1.2,0.030291199684143066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,256,power_law_1.2,0.03041279911994934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,256,power_law_1.2,0.031916800141334536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,256,power_law_1.2,0.03258239924907684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,256,power_law_1.2,0.036499199271202085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,256,power_law_1.2,0.03744640052318573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,256,power_law_1.2,0.04071680009365082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,256,power_law_1.2,0.04611839950084686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,256,power_law_1.2,0.05734400153160095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,256,power_law_1.2,0.07408639788627625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,256,power_law_1.2,0.09722880125045777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,256,power_law_1.2,0.12149759531021118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,256,power_law_1.2,0.17800960540771485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,256,power_law_1.2,0.22574079036712646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,4,power_law_1.01,0.05858560204505921
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,4,power_law_1.01,0.0753216028213501
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,4,power_law_1.01,0.0821951985359192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,4,power_law_1.01,0.10055040121078491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,4,power_law_1.01,0.12227200269699097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,4,power_law_1.01,0.12855679988861085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,4,power_law_1.01,0.16082559823989867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,4,power_law_1.01,0.15802240371704102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,4,power_law_1.01,0.14252159595489503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,4,power_law_1.01,0.16264959573745727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,4,power_law_1.01,0.1608896017074585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,4,power_law_1.01,0.16522879600524903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,4,power_law_1.01,0.16332160234451293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,4,power_law_1.01,0.1715008020401001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,4,power_law_1.01,0.18039040565490722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,4,power_law_1.01,0.18231680393218994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,4,power_law_1.01,0.18963199853897095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,4,power_law_1.01,0.21987199783325195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,4,power_law_1.01,0.2275775909423828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,4,power_law_1.01,0.25505919456481935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,4,power_law_1.01,0.284934401512146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,4,power_law_1.01,0.3464384078979492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,4,power_law_1.01,0.3974656105041504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,4,power_law_1.01,0.5177855968475342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,4,power_law_1.01,0.659276819229126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,4,power_law_1.01,0.8989503860473633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,4,power_law_1.01,1.1298687934875489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,64,power_law_1.2,0.01744000017642975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,64,power_law_1.2,0.016748799383640288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,64,power_law_1.2,0.017472000420093538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,64,power_law_1.2,0.01823360025882721
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,64,power_law_1.2,0.018566399812698364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,64,power_law_1.2,0.020979200303554536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,64,power_law_1.2,0.03529599905014038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,64,power_law_1.2,0.034995201230049136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,64,power_law_1.2,0.02873600125312805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,64,power_law_1.2,0.029151999950408937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,64,power_law_1.2,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,64,power_law_1.2,0.02980479896068573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,64,power_law_1.2,0.029414400458335876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,64,power_law_1.2,0.029791998863220214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,64,power_law_1.2,0.029363200068473816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,64,power_law_1.2,0.030854400992393494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,64,power_law_1.2,0.032339200377464294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,64,power_law_1.2,0.03437440097332001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,64,power_law_1.2,0.037299200892448425
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,64,power_law_1.2,0.041228801012039185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,64,power_law_1.2,0.04500479996204376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,64,power_law_1.2,0.05991680026054382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,64,power_law_1.2,0.07285760045051574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,64,power_law_1.2,0.09501439929008484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,64,power_law_1.2,0.11787519454956055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,64,power_law_1.2,0.16305279731750488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,64,power_law_1.2,0.2094912052154541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,256,balanced,0.0455626646677653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,256,balanced,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,256,balanced,0.04584533472855886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,256,balanced,0.04780266682306925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,256,balanced,0.04994666576385498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,256,balanced,0.04975466430187225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,256,balanced,0.0516480008761088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,256,balanced,0.051925331354141235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,256,balanced,0.05234666665395101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,256,balanced,0.05189866820971171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,256,balanced,0.05413866539796194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,256,balanced,0.05416533350944519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,256,balanced,0.05422399938106537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,256,balanced,0.056143999099731445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,256,balanced,0.059978668888409935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,256,balanced,0.05935466786225637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,256,balanced,0.0621919979651769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,256,balanced,0.06821866830190022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,256,balanced,0.07045333087444305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,256,balanced,0.07941333452860515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,256,balanced,0.08673066894213359
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,256,balanced,0.10314133763313293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,256,balanced,0.12036266922950745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,256,balanced,0.17674134174982706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,256,balanced,0.2212053338686625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,256,balanced,0.30430932839711505
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,256,balanced,0.38178133964538574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,16,balanced,0.04171733558177948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,16,balanced,0.041690667470296226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,16,balanced,0.04383466641108195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,16,balanced,0.04555733501911163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,16,balanced,0.062224000692367554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,16,balanced,0.08065066734949748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,16,balanced,0.08232533435026805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,16,balanced,0.08265600105126698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,16,balanced,0.08330666522185008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,16,balanced,0.08288000027338664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,16,balanced,0.08499733606974284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,16,balanced,0.0848479966322581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,16,balanced,0.08500267068545024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,16,balanced,0.08842133482297261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,16,balanced,0.09152533610661824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,16,balanced,0.09342933694521587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,16,balanced,0.09650133053461711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,16,balanced,0.10308266679445903
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,16,balanced,0.10699733098347981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,16,balanced,0.11917333801587422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,16,balanced,0.1341919998327891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,16,balanced,0.1602133313814799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,16,balanced,0.19249600172042847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,16,balanced,0.24705066283543906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,16,balanced,0.2858826716740926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,16,balanced,0.39636798699696857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,16,balanced,0.4962400197982788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,16,power_law_1.2,0.043321600556373595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,16,power_law_1.2,0.04678399860858917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,16,power_law_1.2,0.04537599980831146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,16,power_law_1.2,0.04668160080909729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,16,power_law_1.2,0.045638400316238406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,16,power_law_1.2,0.0447488009929657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,16,power_law_1.2,0.04557439982891083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,16,power_law_1.2,0.04737919867038727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,16,power_law_1.2,0.04764800071716309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,16,power_law_1.2,0.04739840030670166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,16,power_law_1.2,0.051520001888275144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,16,power_law_1.2,0.05377280116081238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,16,power_law_1.2,0.05422719717025757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,16,power_law_1.2,0.05506560206413269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,16,power_law_1.2,0.06064640283584595
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,16,power_law_1.2,0.0623744010925293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,16,power_law_1.2,0.06839039921760559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,16,power_law_1.2,0.07551360130310059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,16,power_law_1.2,0.07683839797973632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,16,power_law_1.2,0.09070079922676086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,16,power_law_1.2,0.10481280088424683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,16,power_law_1.2,0.1348736047744751
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,16,power_law_1.2,0.15366400480270387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,16,power_law_1.2,0.21063680648803712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,16,power_law_1.2,0.26640000343322756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,16,power_law_1.2,0.3996031999588013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,16,power_law_1.2,0.5015168190002441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,4,power_law_1.2,0.04549759924411774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,4,power_law_1.2,0.057126402854919434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,4,power_law_1.2,0.05790079832077026
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,4,power_law_1.2,0.07760639786720276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,4,power_law_1.2,0.08087679743766785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,4,power_law_1.2,0.08849920034408569
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,4,power_law_1.2,0.10090240240097045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,4,power_law_1.2,0.10014079809188843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,4,power_law_1.2,0.11043839454650879
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,4,power_law_1.2,0.10645760297775268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,4,power_law_1.2,0.10817279815673828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,4,power_law_1.2,0.11188479661941528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,4,power_law_1.2,0.11389440298080444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,4,power_law_1.2,0.12020479440689087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,4,power_law_1.2,0.12585599422454835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,4,power_law_1.2,0.1318719983100891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,4,power_law_1.2,0.13905919790267945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,4,power_law_1.2,0.15674879550933837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,4,power_law_1.2,0.1646463990211487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,4,power_law_1.2,0.2009984016418457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,4,power_law_1.2,0.2292799949645996
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,4,power_law_1.2,0.2961280107498169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,4,power_law_1.2,0.3323967933654785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,4,power_law_1.2,0.4555520057678223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,4,power_law_1.2,0.579366397857666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,4,power_law_1.2,0.8129088401794433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,4,power_law_1.2,1.0694080352783204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,4,power_law_1.2,0.041171199083328246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,4,power_law_1.2,0.04192639887332916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,4,power_law_1.2,0.041407999396324155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,4,power_law_1.2,0.046758401393890384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,4,power_law_1.2,0.05002239942550659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,4,power_law_1.2,0.05478399991989136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,4,power_law_1.2,0.05711359977722168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,4,power_law_1.2,0.057657599449157715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,4,power_law_1.2,0.058310401439666745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,4,power_law_1.2,0.05759360194206238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,4,power_law_1.2,0.061791998147964475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,4,power_law_1.2,0.06424959897994995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,4,power_law_1.2,0.06591359972953796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,4,power_law_1.2,0.070169597864151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,4,power_law_1.2,0.07906559705734253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,4,power_law_1.2,0.081632000207901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,4,power_law_1.2,0.08112639784812928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,4,power_law_1.2,0.09670400023460388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,4,power_law_1.2,0.10647679567337036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,4,power_law_1.2,0.1312000036239624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,4,power_law_1.2,0.16320639848709106
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,4,power_law_1.2,0.20502400398254395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,4,power_law_1.2,0.25058560371398925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,4,power_law_1.2,0.34926719665527345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,4,power_law_1.2,0.45735678672790525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,4,power_law_1.2,0.6677055835723877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,4,power_law_1.2,0.8079936027526855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,2,power_law_1.01,0.08718079924583436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,2,power_law_1.01,0.11326080560684204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,2,power_law_1.01,0.14206080436706542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,2,power_law_1.01,0.17732479572296142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,2,power_law_1.01,0.217305588722229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,2,power_law_1.01,0.25402240753173827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,2,power_law_1.01,0.3116352081298828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,2,power_law_1.01,0.2980992078781128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,2,power_law_1.01,0.3122368097305298
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,2,power_law_1.01,0.3439487934112549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,2,power_law_1.01,0.33434879779815674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,2,power_law_1.01,0.32565760612487793
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,2,power_law_1.01,0.3371903896331787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,2,power_law_1.01,0.35759360790252687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,2,power_law_1.01,0.3516096115112305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,2,power_law_1.01,0.35187199115753176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,2,power_law_1.01,0.3754175901412964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,2,power_law_1.01,0.3945919990539551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,2,power_law_1.01,0.42951040267944335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,2,power_law_1.01,0.4785344123840332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,2,power_law_1.01,0.539353609085083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,2,power_law_1.01,0.6543168067932129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,2,power_law_1.01,0.7617728233337402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,2,power_law_1.01,1.0059967994689942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,2,power_law_1.01,1.1902400016784669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,2,power_law_1.01,1.7399936676025392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,2,power_law_1.01,2.062015914916992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,1,power_law_1.01,0.06279039978981019
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,1,power_law_1.01,0.0785535991191864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,1,power_law_1.01,0.10675200223922729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,1,power_law_1.01,0.1705407977104187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,1,power_law_1.01,0.22868480682373046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,1,power_law_1.01,0.2937279939651489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,1,power_law_1.01,0.4115903854370117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,1,power_law_1.01,0.4158783912658691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,1,power_law_1.01,0.4322368144989014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,1,power_law_1.01,0.4508800029754639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,1,power_law_1.01,0.449183988571167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,1,power_law_1.01,0.47542400360107423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,1,power_law_1.01,0.48005118370056155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,1,power_law_1.01,0.5011583805084229
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,1,power_law_1.01,0.5014400005340576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,1,power_law_1.01,0.5147456169128418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,1,power_law_1.01,0.543398380279541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,1,power_law_1.01,0.5940351963043213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,1,power_law_1.01,0.6044735908508301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,1,power_law_1.01,0.6856319904327393
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,1,power_law_1.01,0.7134016036987305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,1,power_law_1.01,0.8094464302062988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,1,power_law_1.01,0.931884765625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,1,power_law_1.01,1.1756159782409668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,1,power_law_1.01,1.422873592376709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,1,power_law_1.01,1.9124223709106445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,1,power_law_1.01,2.4268543243408205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,64,balanced,0.05243733525276184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,64,balanced,0.04397333165009817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,64,balanced,0.043882668018341064
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,64,balanced,0.04610666632652283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,64,balanced,0.04970133304595947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,64,balanced,0.04781866570313772
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,64,balanced,0.04970133304595947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,64,balanced,0.04987733562787374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,64,balanced,0.049829334020614624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,64,balanced,0.04995200037956238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,64,balanced,0.05193066596984863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,64,balanced,0.05199466645717621
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,64,balanced,0.05192000170548757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,64,balanced,0.05401599903901418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,64,balanced,0.05613866448402405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,64,balanced,0.05772800246874491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,64,balanced,0.05991999804973602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,64,balanced,0.06433066725730896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,64,balanced,0.06648533542950948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,64,balanced,0.07513600091139476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,64,balanced,0.08272533118724823
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,64,balanced,0.09733333190282185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,64,balanced,0.11441600322723389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,64,balanced,0.15286399920781454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,64,balanced,0.1822506586710612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,64,balanced,0.24860266844431558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,64,balanced,0.3081226746241252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,8,power_law_1.01,0.052211201190948485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,8,power_law_1.01,0.06478719711303711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,8,power_law_1.01,0.07208319902420043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,8,power_law_1.01,0.08067839741706848
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,8,power_law_1.01,0.08039680123329163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,8,power_law_1.01,0.07771520018577575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,8,power_law_1.01,0.0869376003742218
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,8,power_law_1.01,0.08988159894943237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,8,power_law_1.01,0.08672000169754028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,8,power_law_1.01,0.09025279879570007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,8,power_law_1.01,0.0927616000175476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,8,power_law_1.01,0.09208959937095643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,8,power_law_1.01,0.09523199796676636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,8,power_law_1.01,0.09678080081939697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,8,power_law_1.01,0.10456960201263428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,8,power_law_1.01,0.10631040334701539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,8,power_law_1.01,0.10930559635162354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,8,power_law_1.01,0.12320640087127685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,8,power_law_1.01,0.13192960023880004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,8,power_law_1.01,0.15397119522094727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,8,power_law_1.01,0.17224960327148436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,8,power_law_1.01,0.20764799118041993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,8,power_law_1.01,0.2503232002258301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,8,power_law_1.01,0.32600960731506345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,8,power_law_1.01,0.4152512073516846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,8,power_law_1.01,0.5941823959350586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,8,power_law_1.01,0.7557248115539551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,32,balanced,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,32,balanced,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,32,balanced,0.020992000897725422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,32,balanced,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,32,balanced,0.022570667167504627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,32,balanced,0.023573334018389385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,32,balanced,0.027301333844661713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,32,balanced,0.025786665578683216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,32,balanced,0.03160533308982849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,32,balanced,0.031445334355036415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,32,balanced,0.03150933235883713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,32,balanced,0.036559998989105225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,32,balanced,0.0354666660229365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,32,balanced,0.04130133241415024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,32,balanced,0.050160000721613564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,32,balanced,0.0499893327554067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,32,balanced,0.0521919975678126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,32,balanced,0.06771733363469441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,32,balanced,0.07275733351707458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,32,balanced,0.10478400190671285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,32,balanced,0.0777759999036789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,32,balanced,0.0940053363641103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,32,balanced,0.11447466413180034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,32,balanced,0.15237866838773093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,32,balanced,0.1872373421986898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,32,balanced,0.2720426718393962
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,32,balanced,0.34772801399230957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,4,power_law_1.01,0.0530239999294281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,4,power_law_1.01,0.05368959903717041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,4,power_law_1.01,0.05724160075187683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,4,power_law_1.01,0.06693120002746582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,4,power_law_1.01,0.07096959948539734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,4,power_law_1.01,0.07327359914779663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,4,power_law_1.01,0.08586239814758301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,4,power_law_1.01,0.07964800000190735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,4,power_law_1.01,0.08252800107002259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,4,power_law_1.01,0.08297600150108338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,4,power_law_1.01,0.08862079977989197
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,4,power_law_1.01,0.08860160112380981
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,4,power_law_1.01,0.09123200178146362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,4,power_law_1.01,0.09146239757537841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,4,power_law_1.01,0.10179200172424316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,4,power_law_1.01,0.10574719905853272
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,4,power_law_1.01,0.10876799821853637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,4,power_law_1.01,0.12020479440689087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,4,power_law_1.01,0.13292800188064574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,4,power_law_1.01,0.15993599891662597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,4,power_law_1.01,0.18142080307006836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,4,power_law_1.01,0.23086719512939452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,4,power_law_1.01,0.27070720195770265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,4,power_law_1.01,0.36751360893249513
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,4,power_law_1.01,0.46826882362365724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,4,power_law_1.01,0.6866432189941406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,4,power_law_1.01,0.8950400352478027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,16,power_law_1.2,0.04574080109596253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,16,power_law_1.2,0.05688959956169128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,16,power_law_1.2,0.05118079781532288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,16,power_law_1.2,0.05807999968528747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,16,power_law_1.2,0.05804799795150757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,16,power_law_1.2,0.05614079833030701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,16,power_law_1.2,0.05482239723205566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,16,power_law_1.2,0.056390398740768434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,16,power_law_1.2,0.05842559933662415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,16,power_law_1.2,0.06174719929695129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,16,power_law_1.2,0.06219519972801209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,16,power_law_1.2,0.06336640119552613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,16,power_law_1.2,0.06444799900054932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,16,power_law_1.2,0.06849279999732971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,16,power_law_1.2,0.07381119728088378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,16,power_law_1.2,0.07494400143623352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,16,power_law_1.2,0.07497599720954895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,16,power_law_1.2,0.08574720025062561
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,16,power_law_1.2,0.08946560025215149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,16,power_law_1.2,0.10169600248336792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,16,power_law_1.2,0.11667840480804444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,16,power_law_1.2,0.14104959964752198
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,16,power_law_1.2,0.1527168035507202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,16,power_law_1.2,0.22648320198059083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,16,power_law_1.2,0.2878528118133545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,16,power_law_1.2,0.38382079601287844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,16,power_law_1.2,0.4884928226470947
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,8,balanced,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,8,balanced,0.035786665976047516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,8,balanced,0.037248000502586365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,8,balanced,0.04002666721741358
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,8,balanced,0.04539200166861216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,8,balanced,0.06237333516279856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,8,balanced,0.06053866446018219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,8,balanced,0.06498666604359944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,8,balanced,0.06538666784763336
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,8,balanced,0.06638399759928386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,8,balanced,0.06508266429106395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,8,balanced,0.0673226664463679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,8,balanced,0.06773333251476288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,8,balanced,0.07064533233642578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,8,balanced,0.07495466868082683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,8,balanced,0.07707733412583669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,8,balanced,0.07696000238259633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,8,balanced,0.0844586690266927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,8,balanced,0.08734933535257976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,8,balanced,0.10152533650398254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,8,balanced,0.10743467013041179
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,8,balanced,0.13570666313171387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,8,balanced,0.15693333745002747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,8,balanced,0.21066133181254068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,8,balanced,0.2483839988708496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,8,balanced,0.33585067590077716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,8,balanced,0.4274933338165283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,16,balanced,0.048058668772379555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,16,balanced,0.04913066824277242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,16,balanced,0.05539733171463013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,16,balanced,0.06420266628265381
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,16,balanced,0.08052800099054973
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,16,balanced,0.10964799920717876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,16,balanced,0.10573866963386536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,16,balanced,0.10402666529019673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,16,balanced,0.10114133358001709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,16,balanced,0.10116799672444661
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,16,balanced,0.09935466448465984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,16,balanced,0.09704533219337463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,16,balanced,0.09418132901191711
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,16,balanced,0.09771733482678731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,16,balanced,0.10095999638239543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,16,balanced,0.09948266545931499
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,16,balanced,0.1055626670519511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,16,balanced,0.10587199529012044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,16,balanced,0.11910933256149292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,16,balanced,0.12016000350316365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,16,balanced,0.13382400075594583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,16,balanced,0.15017599860827127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,16,balanced,0.17229866981506348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,16,balanced,0.24393065770467123
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,16,balanced,0.28086400032043457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,16,balanced,0.3899093468983968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,16,balanced,0.4888266722361247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,64,power_law_1.01,0.05649920105934143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,64,power_law_1.01,0.04479359984397888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,64,power_law_1.01,0.04376319944858551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,64,power_law_1.01,0.047775998711586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,64,power_law_1.01,0.045433598756790164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,64,power_law_1.01,0.04547840058803558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,64,power_law_1.01,0.046137601137161255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,64,power_law_1.01,0.04737280011177063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,64,power_law_1.01,0.04747520089149475
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,64,power_law_1.01,0.04757120013237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,64,power_law_1.01,0.04883840084075928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,64,power_law_1.01,0.05139200091361999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,64,power_law_1.01,0.05271040201187134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,64,power_law_1.01,0.055904000997543335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,64,power_law_1.01,0.06095359921455383
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,64,power_law_1.01,0.0629696011543274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,64,power_law_1.01,0.0640447974205017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,64,power_law_1.01,0.0750656008720398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,64,power_law_1.01,0.07771520018577575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,64,power_law_1.01,0.0950976014137268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,64,power_law_1.01,0.11022080183029175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,64,power_law_1.01,0.1388864040374756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,64,power_law_1.01,0.16491520404815674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,64,power_law_1.01,0.21429119110107422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,64,power_law_1.01,0.2638015985488892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,64,power_law_1.01,0.3758591890335083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,64,power_law_1.01,0.473145580291748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,1,power_law_1.2,0.06223359704017639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,1,power_law_1.2,0.07731840014457703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,1,power_law_1.2,0.10549759864807129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,1,power_law_1.2,0.15816320180892945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,1,power_law_1.2,0.21575679779052734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,1,power_law_1.2,0.27596800327301024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,1,power_law_1.2,0.3870719909667969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,1,power_law_1.2,0.4193535804748535
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,1,power_law_1.2,0.4320767879486084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,1,power_law_1.2,0.449721622467041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,1,power_law_1.2,0.4579904079437256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,1,power_law_1.2,0.4766719818115234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,1,power_law_1.2,0.482912015914917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,1,power_law_1.2,0.5027391910552979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,1,power_law_1.2,0.5163648128509521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,1,power_law_1.2,0.5314496040344239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,1,power_law_1.2,0.5711040019989013
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,1,power_law_1.2,0.6222335815429687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,1,power_law_1.2,0.6223296165466309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,1,power_law_1.2,0.7282432079315185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,1,power_law_1.2,0.7301439762115478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,1,power_law_1.2,0.8606719970703125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,1,power_law_1.2,0.9737728118896485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,1,power_law_1.2,1.2233728408813476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,1,power_law_1.2,1.468665599822998
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,1,power_law_1.2,1.9481151580810547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,1,power_law_1.2,2.4717695236206056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,2,balanced,0.0414986660083135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,2,balanced,0.04350399971008301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,2,balanced,0.04404266675313314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,2,balanced,0.0509493350982666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,2,balanced,0.06887466708819072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,2,balanced,0.09172800183296204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,2,balanced,0.09539199868837993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,2,balanced,0.095360000928243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,2,balanced,0.09706667065620422
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,2,balanced,0.09696533282597859
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,2,balanced,0.099589337905248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,2,balanced,0.10156800349553426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,2,balanced,0.10113599896430969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,2,balanced,0.10678399602572124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,2,balanced,0.11244799693425496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,2,balanced,0.11597333351771037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,2,balanced,0.12402666608492534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,2,balanced,0.14039466778437296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,2,balanced,0.15401066342989603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,2,balanced,0.18429332971572876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,2,balanced,0.20619734128316244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,2,balanced,0.2866186698277791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,2,balanced,0.32740267117818195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,2,balanced,0.4779626528422038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,2,balanced,0.5854133367538452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,2,balanced,0.8637813727060953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,2,balanced,1.1319093704223633
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,2,2,balanced,0.064410666624705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,2,2,balanced,0.07271466652552287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,2,2,balanced,0.084714670976003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,2,2,balanced,0.11910399794578552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,2,2,balanced,0.16861865917841592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,2,2,balanced,0.25300800800323486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,2,2,balanced,0.24476800362269083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,2,2,balanced,0.24239999055862427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,2,2,balanced,0.23323200146357217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,2,2,balanced,0.23057599862416586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,2,2,balanced,0.23015467325846353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,2,2,balanced,0.23162666956583658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,2,2,balanced,0.22906132539113364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,2,2,balanced,0.23718933264414468
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,2,2,balanced,0.23481067021687826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,2,2,balanced,0.23656533161799112
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,2,2,balanced,0.2576693296432495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,2,2,balanced,0.2612373431523641
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,2,2,balanced,0.281274676322937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,2,2,balanced,0.29732267061869305
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,2,2,balanced,0.32657066980997723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,2,2,balanced,0.3887573480606079
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,2,2,balanced,0.4543466567993164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,2,2,balanced,0.6872853438059489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,2,2,balanced,0.8068106969197592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,2,2,balanced,1.1665759881337483
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,2,2,balanced,1.500325361887614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,32,1,power_law_1.01,0.04996480047702789
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,32,1,power_law_1.01,0.05136640071868896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,32,1,power_law_1.01,0.05306879878044128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,32,1,power_law_1.01,0.06401280164718628
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,32,1,power_law_1.01,0.07584000229835511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,32,1,power_law_1.01,0.08723199963569642
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,32,1,power_law_1.01,0.102566397190094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,32,1,power_law_1.01,0.10561280250549317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,32,1,power_law_1.01,0.10892159938812256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,32,1,power_law_1.01,0.11105920076370239
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,32,1,power_law_1.01,0.1152448058128357
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,32,1,power_law_1.01,0.11850240230560302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,32,1,power_law_1.01,0.12095359563827515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,32,1,power_law_1.01,0.12479360103607177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,32,1,power_law_1.01,0.13214080333709716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,32,1,power_law_1.01,0.13720959424972534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,32,1,power_law_1.01,0.14387840032577515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,32,1,power_law_1.01,0.17769600152969361
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,32,1,power_law_1.01,0.18063360452651978
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,32,1,power_law_1.01,0.2370687961578369
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,32,1,power_law_1.01,0.2849600076675415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,32,1,power_law_1.01,0.35944960117340086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,32,1,power_law_1.01,0.4362175941467285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,32,1,power_law_1.01,0.5987520217895508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,32,1,power_law_1.01,0.7465792179107666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,32,1,power_law_1.01,1.0753664016723632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,32,1,power_law_1.01,1.3802687644958496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,16,power_law_1.2,0.05411199927330017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,16,power_law_1.2,0.069651198387146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,16,power_law_1.2,0.06670719981193543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,16,power_law_1.2,0.07445759773254394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,16,power_law_1.2,0.07278720140457154
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,16,power_law_1.2,0.06844800114631652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,16,power_law_1.2,0.06910079717636108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,16,power_law_1.2,0.07285119891166687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,16,power_law_1.2,0.07203840017318726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,16,power_law_1.2,0.07260800004005433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,16,power_law_1.2,0.07649279832839966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,16,power_law_1.2,0.07975680232048035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,16,power_law_1.2,0.07935360074043274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,16,power_law_1.2,0.08163840174674988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,16,power_law_1.2,0.08689919710159302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,16,power_law_1.2,0.08975359797477722
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,16,power_law_1.2,0.09141759872436524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,16,power_law_1.2,0.1045375943183899
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,16,power_law_1.2,0.11273599863052368
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,16,power_law_1.2,0.12510080337524415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,16,power_law_1.2,0.13466880321502686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,16,power_law_1.2,0.16494719982147216
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,16,power_law_1.2,0.18447999954223632
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,16,power_law_1.2,0.24795520305633545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,16,power_law_1.2,0.31953279972076415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,16,power_law_1.2,0.4255040168762207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,16,power_law_1.2,0.5736703872680664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,32,balanced,0.03988266736268997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,32,balanced,0.03775466730197271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,32,balanced,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,32,balanced,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,32,balanced,0.043696001172065735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,32,balanced,0.04480533301830292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,32,balanced,0.04597333570321401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,32,balanced,0.04780266682306925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,32,balanced,0.0480320006608963
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,32,balanced,0.04773866633574168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,32,balanced,0.04993066688378652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,32,balanced,0.04967466493447622
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,32,balanced,0.047824000318845115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,32,balanced,0.0516533354918162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,32,balanced,0.05412266651789347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,32,balanced,0.05603733162085215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,32,balanced,0.0598826656738917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,32,balanced,0.06398933132489522
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,32,balanced,0.0662666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,32,balanced,0.07683200140794118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,32,balanced,0.08477866649627686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,32,balanced,0.10289600491523743
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,32,balanced,0.13146133224169412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,32,balanced,0.1802026629447937
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,32,balanced,0.2237173318862915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,32,balanced,0.30823999643325806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,32,balanced,0.39796801408131915
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,2,power_law_1.01,0.10798720121383668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,2,power_law_1.01,0.1304128050804138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,2,power_law_1.01,0.15061119794845582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,2,power_law_1.01,0.1856063961982727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,2,power_law_1.01,0.21531519889831544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,2,power_law_1.01,0.2518656015396118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,2,power_law_1.01,0.31291520595550537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,2,power_law_1.01,0.32943999767303467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,2,power_law_1.01,0.32887680530548097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,2,power_law_1.01,0.3311295986175537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,2,power_law_1.01,0.35212159156799316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,2,power_law_1.01,0.35886080265045167
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,2,power_law_1.01,0.3382143974304199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,2,power_law_1.01,0.3699199914932251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,2,power_law_1.01,0.37048959732055664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,2,power_law_1.01,0.3749567985534668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,2,power_law_1.01,0.3695103883743286
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,2,power_law_1.01,0.40999040603637693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,2,power_law_1.01,0.42488961219787597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,2,power_law_1.01,0.47788801193237307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,2,power_law_1.01,0.5211967945098877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,2,power_law_1.01,0.6118656158447265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,2,power_law_1.01,0.7339903831481933
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,2,power_law_1.01,0.9727231979370117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,2,power_law_1.01,1.1838015556335448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,2,power_law_1.01,1.7091840744018554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,2,power_law_1.01,2.1602048873901367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,256,power_law_1.01,0.052185600996017455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,256,power_law_1.01,0.05255680084228516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,256,power_law_1.01,0.05309439897537231
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,256,power_law_1.01,0.05570560097694397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,256,power_law_1.01,0.05483520030975342
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,256,power_law_1.01,0.05533440113067627
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,256,power_law_1.01,0.05548800230026245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,256,power_law_1.01,0.0553600013256073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,256,power_law_1.01,0.05482879877090454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,256,power_law_1.01,0.05633919835090637
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,256,power_law_1.01,0.05625600218772888
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,256,power_law_1.01,0.05619199872016907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,256,power_law_1.01,0.05651199817657471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,256,power_law_1.01,0.056806397438049314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,256,power_law_1.01,0.05923200249671936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,256,power_law_1.01,0.05914880037307739
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,256,power_law_1.01,0.061478400230407716
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,256,power_law_1.01,0.06877440214157104
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,256,power_law_1.01,0.06863359808921814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,256,power_law_1.01,0.08012160062789916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,256,power_law_1.01,0.0874559998512268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,256,power_law_1.01,0.10918400287628174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,256,power_law_1.01,0.132096004486084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,256,power_law_1.01,0.17877119779586792
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,256,power_law_1.01,0.2189311981201172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,256,power_law_1.01,0.3068928003311157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,256,power_law_1.01,0.3982208013534546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,1,balanced,0.049866666396458946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,1,balanced,0.05208533505598704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,1,balanced,0.05921066800753275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,1,balanced,0.08408533533414204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,1,balanced,0.12161067128181458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,1,balanced,0.1888213356335958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,1,balanced,0.18988800048828125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,1,balanced,0.1946880022684733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,1,balanced,0.19502933820088705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,1,balanced,0.19699732462565103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,1,balanced,0.20153067509333292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,1,balanced,0.2076746622721354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,1,balanced,0.20543466011683145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,1,balanced,0.21173334121704102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,1,balanced,0.22392000754674277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,1,balanced,0.2221333384513855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,1,balanced,0.23145065704981485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,1,balanced,0.25641600290934247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,1,balanced,0.27264533440272015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,1,balanced,0.31811734040578205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,1,balanced,0.35316268603007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,1,balanced,0.46876800060272217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,1,balanced,0.522277315457662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,1,balanced,0.7383999824523926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,1,balanced,0.8890133698781332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,1,balanced,1.263045310974121
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,1,balanced,1.6258452733357747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,4,16,balanced,0.04320533573627472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,4,16,balanced,0.04412800073623657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,4,16,balanced,0.043824002146720886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,4,16,balanced,0.0462666650613149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,4,16,balanced,0.051685333251953125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,4,16,balanced,0.05363733569780985
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,4,16,balanced,0.053898667295773826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,4,16,balanced,0.05596800148487091
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,4,16,balanced,0.05603733162085215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,4,16,balanced,0.05402133365472158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,4,16,balanced,0.05596266686916351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,4,16,balanced,0.05604266623655955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,4,16,balanced,0.056048000852266945
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,4,16,balanced,0.058101331194241844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,4,16,balanced,0.06222933530807495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,4,16,balanced,0.0641653339068095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,4,16,balanced,0.06651199857393901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,4,16,balanced,0.07274133463700612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,4,16,balanced,0.07622399926185608
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,4,16,balanced,0.08701866865158081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,4,16,balanced,0.09904000163078308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,4,16,balanced,0.11363200346628825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,4,16,balanced,0.14446933070818582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,4,16,balanced,0.19604265689849854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,4,16,balanced,0.22910400231679282
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,4,16,balanced,0.31389333804448444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,4,16,balanced,0.39977065722147626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,8,power_law_1.2,0.020294399559497835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,8,power_law_1.2,0.02412160038948059
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,8,power_law_1.2,0.02678399980068207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,8,power_law_1.2,0.029203200340270997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,8,power_law_1.2,0.031692799925804135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,8,power_law_1.2,0.034790399670600894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,8,power_law_1.2,0.037376001477241516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,8,power_law_1.2,0.034668800234794614
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,8,power_law_1.2,0.04019840061664581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,8,power_law_1.2,0.03829759955406189
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,8,power_law_1.2,0.0381632000207901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,8,power_law_1.2,0.057708799839019775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,8,power_law_1.2,0.05230720043182373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,8,power_law_1.2,0.052198398113250735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,8,power_law_1.2,0.07112320065498352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,8,power_law_1.2,0.0701312005519867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,8,power_law_1.2,0.07066239714622498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,8,power_law_1.2,0.08391680121421814
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,8,power_law_1.2,0.07987840175628662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,8,power_law_1.2,0.08485760092735291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,8,power_law_1.2,0.08715519905090333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,8,power_law_1.2,0.09658880233764648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,8,power_law_1.2,0.11696640253067017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,8,power_law_1.2,0.1411072015762329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,8,power_law_1.2,0.17375999689102173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,8,power_law_1.2,0.249235200881958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,8,power_law_1.2,0.3077568054199219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,4,power_law_1.01,0.043289598822593686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,4,power_law_1.01,0.04805760085582733
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,4,power_law_1.01,0.05043839812278748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,4,power_law_1.01,0.061715197563171384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,4,power_law_1.01,0.069651198387146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,4,power_law_1.01,0.06980479955673217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,4,power_law_1.01,0.07837439775466919
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,4,power_law_1.01,0.08247680068016053
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,4,power_law_1.01,0.08440960049629212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,4,power_law_1.01,0.08389760255813598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,4,power_law_1.01,0.08234239816665649
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,4,power_law_1.01,0.08672000169754028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,4,power_law_1.01,0.0915008008480072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,4,power_law_1.01,0.09435520172119141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,4,power_law_1.01,0.10236799716949463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,4,power_law_1.01,0.10506880283355713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,4,power_law_1.01,0.10741759538650512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,4,power_law_1.01,0.12312959432601929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,4,power_law_1.01,0.13219200372695922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,4,power_law_1.01,0.16011519432067872
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,4,power_law_1.01,0.17991679906845093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,4,power_law_1.01,0.22746880054473878
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,4,power_law_1.01,0.27594239711761476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,4,power_law_1.01,0.3845952033996582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,4,power_law_1.01,0.4854015827178955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,4,power_law_1.01,0.6590720176696777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,4,power_law_1.01,0.8505215644836426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,4,balanced,0.0236160010099411
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,4,balanced,0.025466665625572205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,4,balanced,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,4,balanced,0.043866669138272606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,4,balanced,0.0609493354956309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,4,balanced,0.09485333164532979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,4,balanced,0.0981226662794749
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,4,balanced,0.1011946698029836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,4,balanced,0.10362133383750916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,4,balanced,0.1051573355992635
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,4,balanced,0.10909333825111389
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,4,balanced,0.13730133573214212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,4,balanced,0.13611732920010886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,4,balanced,0.13702399532000223
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,4,balanced,0.20376000801722208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,4,balanced,0.19631999731063843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,4,balanced,0.2009333372116089
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,4,balanced,0.23586666584014893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,4,balanced,0.2627840042114258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,4,balanced,0.30131200949350995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,4,balanced,0.31228800614674884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,4,balanced,0.3275039990743001
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,4,balanced,0.3468746741612752
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,4,balanced,0.35161598523457843
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,4,balanced,0.3729493220647176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,4,balanced,0.6614240010579427
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,4,balanced,0.6867199738820394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,2,balanced,0.05609600245952606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,2,balanced,0.05821333328882853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,2,balanced,0.062352001667022705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,2,balanced,0.07025066514809926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,2,balanced,0.0867199997107188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,2,balanced,0.11786666512489319
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,2,balanced,0.11945066849390666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,2,balanced,0.11999467015266418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,2,balanced,0.12050132950146993
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,2,balanced,0.1216266651948293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,2,balanced,0.12328533331553142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,2,balanced,0.12452800075213115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,2,balanced,0.12351999680201213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,2,balanced,0.13009066383043924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,2,balanced,0.13434666395187378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,2,balanced,0.13685333728790283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,2,balanced,0.14417066176732382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,2,balanced,0.1609599987665812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,2,balanced,0.17417067289352417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,2,balanced,0.2034133275349935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,2,balanced,0.22923733790715536
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,2,balanced,0.2928746740023295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,2,balanced,0.35386133193969727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,2,balanced,0.5171093146006266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,2,balanced,0.6447733243306478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,2,balanced,0.9388266404469808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,2,balanced,1.2188159624735515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,32,balanced,0.03987200061480204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,32,balanced,0.03968533376852671
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,32,balanced,0.03996799886226654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,32,balanced,0.042026668787002563
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,32,balanced,0.04603200157483419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,32,balanced,0.04582933088143667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,32,balanced,0.04717333118120829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,32,balanced,0.04783466458320618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,32,balanced,0.04765866696834564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,32,balanced,0.047824000318845115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,32,balanced,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,32,balanced,0.04980266590913137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,32,balanced,0.049600000182787575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,32,balanced,0.05166399975617727
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,32,balanced,0.05403199791908264
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,32,balanced,0.05584533512592316
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,32,balanced,0.0595360000928243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,32,balanced,0.06392000118891399
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,32,balanced,0.06603200236956279
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,32,balanced,0.0762613316377004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,32,balanced,0.08275733391443889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,32,balanced,0.10094400246938069
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,32,balanced,0.12009599804878235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,32,balanced,0.17695999145507812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,32,balanced,0.21766932805379233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,32,balanced,0.3059946695963542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,32,balanced,0.3927786747614543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,32,power_law_1.2,0.017766399681568144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,32,power_law_1.2,0.018016000092029572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,32,power_law_1.2,0.01761920005083084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,32,power_law_1.2,0.017523199319839478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,32,power_law_1.2,0.018323199450969697
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,32,power_law_1.2,0.01886720061302185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,32,power_law_1.2,0.02207999974489212
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,32,power_law_1.2,0.021939200162887574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,32,power_law_1.2,0.03523840010166168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,32,power_law_1.2,0.035366401076316833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,32,power_law_1.2,0.03603839874267578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,32,power_law_1.2,0.031692799925804135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,32,power_law_1.2,0.03131519854068756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,32,power_law_1.2,0.030982398986816408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,32,power_law_1.2,0.03203200101852417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,32,power_law_1.2,0.03297280073165894
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,32,power_law_1.2,0.03361279964447021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,32,power_law_1.2,0.034508800506591795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,32,power_law_1.2,0.03808000087738037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,32,power_law_1.2,0.04366720020771027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,32,power_law_1.2,0.04577920138835907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,32,power_law_1.2,0.056953597068786624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,32,power_law_1.2,0.06917759776115417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,32,power_law_1.2,0.0907584011554718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,32,power_law_1.2,0.11407999992370606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,32,power_law_1.2,0.15827200412750245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,32,power_law_1.2,0.19736319780349731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,8,power_law_1.2,0.057734400033950806
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,8,power_law_1.2,0.07284479737281799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,8,power_law_1.2,0.07445120215415954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,8,power_law_1.2,0.08521599769592285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,8,power_law_1.2,0.0974016010761261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,8,power_law_1.2,0.10685440301895141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,8,power_law_1.2,0.10275839567184449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,8,power_law_1.2,0.10030720233917237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,8,power_law_1.2,0.09834240078926086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,8,power_law_1.2,0.09826560020446777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,8,power_law_1.2,0.1041856050491333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,8,power_law_1.2,0.10394879579544067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,8,power_law_1.2,0.10798720121383668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,8,power_law_1.2,0.10944000482559205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,8,power_law_1.2,0.11516159772872925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,8,power_law_1.2,0.11656960248947143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,8,power_law_1.2,0.12362879514694214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,8,power_law_1.2,0.13691519498825072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,8,power_law_1.2,0.14425599575042725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,16,balanced,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,16,balanced,0.019482667247454327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,16,balanced,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,8,power_law_1.2,0.16424319744110108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,16,balanced,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,16,balanced,0.02128000060717265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,16,balanced,0.022858666876951855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,16,balanced,0.023743999501069386
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,16,balanced,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,8,power_law_1.2,0.1843008041381836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,16,balanced,0.02924266705910365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,16,balanced,0.029301332930723827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,16,balanced,0.029274667302767437
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,16,balanced,0.041797334949175514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,16,balanced,0.04146133363246918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,16,balanced,0.039664000272750854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,16,balanced,0.04382933179537455
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,16,balanced,0.04379733403523763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,16,balanced,0.043712000052134194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,16,balanced,0.047594666481018066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,16,balanced,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,16,balanced,0.058650667468706764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,16,balanced,0.06974400083223979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,16,balanced,0.0817386656999588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,16,balanced,0.09703466296195984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,16,balanced,0.12380799651145935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,16,balanced,0.14665599664052328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,16,balanced,0.2132906715075175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,16,balanced,0.2656906644503276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,8,power_law_1.2,0.22263679504394532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,8,power_law_1.2,0.24893438816070557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,8,power_law_1.2,0.3349503993988037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,8,power_law_1.2,0.38311679363250734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,8,power_law_1.2,0.5419136047363281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,8,power_law_1.2,0.7037824153900146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,4,power_law_1.01,0.056831997632980344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,4,power_law_1.01,0.07344639897346497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,4,power_law_1.01,0.08030080199241638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,4,power_law_1.01,0.10955519676208496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,4,power_law_1.01,0.11383039951324463
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,4,power_law_1.01,0.12894079685211182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,4,power_law_1.01,0.1599552035331726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,4,power_law_1.01,0.14913920164108277
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,4,power_law_1.01,0.14859520196914672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,4,power_law_1.01,0.1486400008201599
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,4,power_law_1.01,0.15994240045547486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,4,power_law_1.01,0.15944960117340087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,4,power_law_1.01,0.16054400205612182
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,4,power_law_1.01,0.168723201751709
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,4,power_law_1.01,0.17558399438858033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,4,power_law_1.01,0.1745792031288147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,4,power_law_1.01,0.1805184006690979
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,4,power_law_1.01,0.19630080461502075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,4,power_law_1.01,0.20404479503631592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,4,power_law_1.01,0.2336575984954834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,4,power_law_1.01,0.24977281093597412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,4,power_law_1.01,0.30475521087646484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,4,power_law_1.01,0.3502336025238037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,4,power_law_1.01,0.43042559623718263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,4,power_law_1.01,0.5266560077667236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,4,power_law_1.01,0.7144320011138916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,4,power_law_1.01,0.8875328063964844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,4,1,balanced,0.019861333072185516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,4,1,balanced,0.021162666380405426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,4,1,balanced,0.023306667804718018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,4,1,balanced,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,4,1,balanced,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,4,1,balanced,0.06557333469390869
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,4,1,balanced,0.0659093310435613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,4,1,balanced,0.06774400174617767
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,4,1,balanced,0.06832533578077953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,4,1,balanced,0.07095466554164886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,4,1,balanced,0.07206933200359344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,4,1,balanced,0.07285333176453908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,4,1,balanced,0.07668266693751018
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,4,1,balanced,0.08025066554546356
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,4,1,balanced,0.08455999692281087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,4,1,balanced,0.08933333555857341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,4,1,balanced,0.09095999598503113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,4,1,balanced,0.09825600186983745
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,4,1,balanced,0.10672533512115479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,4,1,balanced,0.12569066882133484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,4,1,balanced,0.142277330160141
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,4,1,balanced,0.20200000206629434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,4,1,balanced,0.18111467361450195
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,4,1,balanced,0.23895466327667236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,4,1,balanced,0.30051199595133465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,4,1,balanced,0.4259466727574666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,4,1,balanced,0.5516053438186646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,64,balanced,0.039642666776975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,64,balanced,0.03570133447647095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,64,balanced,0.03608533243338267
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,64,balanced,0.03994133323431015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,64,balanced,0.037621334195137024
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,64,balanced,0.04303466777006785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,64,balanced,0.042037333051363625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,64,balanced,0.04204266766707102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,64,balanced,0.04240000247955322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,64,balanced,0.04359466830889384
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,64,balanced,0.04385599990685781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,64,balanced,0.04568000137805939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,64,balanced,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,64,balanced,0.0467199981212616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,64,balanced,0.05013866722583771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,64,balanced,0.05209066470464071
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,64,balanced,0.05190933247407278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,64,balanced,0.05811200042565664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,64,balanced,0.05986666679382324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,64,balanced,0.06810666620731354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,64,balanced,0.07329600056012471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,64,balanced,0.08476799726486206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,64,balanced,0.0972053309281667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,64,balanced,0.12808533509572348
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,64,balanced,0.15502933661142984
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,64,balanced,0.21266132593154907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,64,balanced,0.26291199525197345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,8,power_law_1.2,0.05921279788017273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,8,power_law_1.2,0.07541120052337646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,8,power_law_1.2,0.07437440156936645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,8,power_law_1.2,0.08812159895896912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,8,power_law_1.2,0.0906112015247345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,8,power_law_1.2,0.09036160111427308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,8,power_law_1.2,0.09433599710464477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,8,power_law_1.2,0.09442560076713562
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,8,power_law_1.2,0.09693440198898315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,8,power_law_1.2,0.09430400133132935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,8,power_law_1.2,0.09833599925041199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,8,power_law_1.2,0.0985535979270935
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,8,power_law_1.2,0.09841279983520508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,8,power_law_1.2,0.1028223991394043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,8,power_law_1.2,0.10729600191116333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,8,power_law_1.2,0.10734080076217652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,8,power_law_1.2,0.1124608039855957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,8,power_law_1.2,0.1255295991897583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,8,power_law_1.2,0.13204480409622193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,8,power_law_1.2,0.16535680294036864
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,8,power_law_1.2,0.19319039583206177
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,8,power_law_1.2,0.23277440071105956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,8,power_law_1.2,0.3004863977432251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,8,power_law_1.2,0.40440959930419923
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,8,power_law_1.2,0.5278592109680176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,8,power_law_1.2,0.7450751781463623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,8,power_law_1.2,1.0759936332702638
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,8,32,power_law_1.01,0.044870400428771974
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,8,32,power_law_1.01,0.040703999996185306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,8,32,power_law_1.01,0.04284160137176514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,8,32,power_law_1.01,0.041433599591255185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,8,32,power_law_1.01,0.04131839871406555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,8,32,power_law_1.01,0.04302079975605011
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,8,32,power_law_1.01,0.04318720102310181
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,8,32,power_law_1.01,0.044588801264762876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,8,32,power_law_1.01,0.044787201285362246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,8,32,power_law_1.01,0.045151999592781066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,8,32,power_law_1.01,0.04470399916172028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,8,32,power_law_1.01,0.04508799910545349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,8,32,power_law_1.01,0.04599039852619171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,8,32,power_law_1.01,0.04669440090656281
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,8,32,power_law_1.01,0.05041279792785645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,8,32,power_law_1.01,0.05188480019569397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,8,32,power_law_1.01,0.05475839972496033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,8,32,power_law_1.01,0.059359997510910034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,8,32,power_law_1.01,0.06330879926681518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,8,32,power_law_1.01,0.0725823998451233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,8,32,power_law_1.01,0.0821951985359192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,8,32,power_law_1.01,0.10623999834060668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,8,32,power_law_1.01,0.12240639925003052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,8,32,power_law_1.01,0.16568319797515868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,8,32,power_law_1.01,0.21004159450531007
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,8,32,power_law_1.01,0.2877887964248657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,8,32,power_law_1.01,0.3504575967788696
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,64,balanced,0.05269866685072581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,64,balanced,0.04167999823888143
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,64,balanced,0.04144000013669332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,64,balanced,0.043925335009892784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,64,balanced,0.04364266494909922
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,64,balanced,0.04834666848182678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,64,balanced,0.049098665515581764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,64,balanced,0.049685334165891014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,64,balanced,0.0496319979429245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,64,balanced,0.051130667328834534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,64,balanced,0.04994133114814758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,64,balanced,0.05147733290990194
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,64,balanced,0.05189866820971171
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,64,balanced,0.052330667773882546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,64,balanced,0.056015998125076294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,64,balanced,0.05685333410898844
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,64,balanced,0.05606399973233541
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,64,balanced,0.06005333364009857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,64,balanced,0.06214933097362518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,64,balanced,0.06849599877993266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,64,balanced,0.0764160007238388
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,64,balanced,0.0916266640027364
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,64,balanced,0.10531733433405559
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,64,balanced,0.13983466227849325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,64,balanced,0.16869332393010458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,64,balanced,0.22210667530695596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,64,balanced,0.2731093366940816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,4,1,power_law_1.01,0.047942399978637695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,4,1,power_law_1.01,0.053376001119613645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,4,1,power_law_1.01,0.06094080209732056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,4,1,power_law_1.01,0.08504319787025452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,4,1,power_law_1.01,0.10721919536590577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,4,1,power_law_1.01,0.1242751955986023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,4,1,power_law_1.01,0.16001919507980347
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,4,1,power_law_1.01,0.1651520013809204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,4,1,power_law_1.01,0.16381440162658692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,4,1,power_law_1.01,0.171942400932312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,4,1,power_law_1.01,0.1796671986579895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,4,1,power_law_1.01,0.18584959506988524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,4,1,power_law_1.01,0.19073920249938964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,4,1,power_law_1.01,0.20083200931549072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,4,1,power_law_1.01,0.21264638900756835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,4,1,power_law_1.01,0.221779203414917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,4,1,power_law_1.01,0.23350400924682618
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,4,1,power_law_1.01,0.2682624101638794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,4,1,power_law_1.01,0.29215359687805176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,4,1,power_law_1.01,0.36037120819091795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,4,1,power_law_1.01,0.41468157768249514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,4,1,power_law_1.01,0.5364352226257324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,4,1,power_law_1.01,0.640064001083374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,4,1,power_law_1.01,0.8710847854614258
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,4,1,power_law_1.01,1.1038271903991699
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,4,1,power_law_1.01,1.5498623847961426
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,4,1,power_law_1.01,1.9871679306030274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,256,balanced,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,256,balanced,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,256,balanced,0.02073066681623459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,256,balanced,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,256,balanced,0.035589332381884255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,256,balanced,0.031109333038330078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,256,balanced,0.03126933425664902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,256,balanced,0.03165333221356074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,256,balanced,0.03166399896144867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,256,balanced,0.03141333411137263
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,256,balanced,0.03192000091075897
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,256,balanced,0.03201066702604294
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,256,balanced,0.03162133445342382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,256,balanced,0.03165333221356074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,256,balanced,0.03548266738653183
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,256,balanced,0.03578133384386698
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,256,balanced,0.03356266766786575
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,256,balanced,0.0378506655494372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,256,balanced,0.039264000952243805
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,256,balanced,0.04182933270931244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,256,balanced,0.05213333169619242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,256,balanced,0.061861331264177956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,256,balanced,0.07647466659545898
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,256,balanced,0.10680533448855083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,256,balanced,0.13173333803812662
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,256,balanced,0.1848906675974528
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,256,balanced,0.23830399910608926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,16,balanced,0.03974399964014689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,16,balanced,0.041562666495641075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,16,balanced,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,16,balanced,0.042581334710121155
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,16,balanced,0.04605866471926371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,16,balanced,0.04789866507053375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,16,balanced,0.04996799925963084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,16,balanced,0.049914668003718056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,16,balanced,0.0516480008761088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,16,balanced,0.04993600149949392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,16,balanced,0.05193066596984863
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,16,balanced,0.052000001072883606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,16,balanced,0.05197333296140035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,16,balanced,0.055919999877611794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,16,balanced,0.05821333328882853
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,16,balanced,0.05816533168156942
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,16,balanced,0.062319998939832054
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,16,balanced,0.06817066669464111
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,16,balanced,0.07027199864387512
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,16,balanced,0.081535999973615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,16,balanced,0.09088533123334248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,16,balanced,0.11329600214958191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,16,balanced,0.1476693352063497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,16,balanced,0.2076266606648763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,16,balanced,0.24380266666412354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,16,balanced,0.34141333897908527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,16,balanced,0.4390133221944173
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,1,balanced,0.10421866178512573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,1,balanced,0.13616533080736795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,1,balanced,0.19103467464447021
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,1,balanced,0.27244265874226886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,1,balanced,0.4245813290278117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,1,balanced,0.6935413678487142
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,1,balanced,0.6776533126831055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,1,balanced,0.675551970799764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,1,balanced,0.6375093460083008
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,1,balanced,0.6296746730804443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,1,balanced,0.643120010693868
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,1,balanced,0.6419306596120199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,1,balanced,0.6383839845657349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,1,balanced,0.6516960064570109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,1,balanced,0.6594506502151489
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,1,balanced,0.6642026503880819
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,1,balanced,0.6894240379333496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,1,balanced,0.7052533626556396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,1,balanced,0.7461280028025309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,1,balanced,0.7871786753336588
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,1,balanced,0.8422826925913492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,1,balanced,0.9609013398488363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,1,balanced,1.0896906852722168
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,1,balanced,1.7713653246561687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,1,balanced,1.9907093048095703
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,1,balanced,2.9711198806762695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,1,balanced,3.6175146102905273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,256,power_law_1.01,0.043507200479507444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,256,power_law_1.01,0.04471679925918579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,256,power_law_1.01,0.04694400131702423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,256,power_law_1.01,0.04688639938831329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,256,power_law_1.01,0.04695039987564087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,256,power_law_1.01,0.04867199957370758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,256,power_law_1.01,0.049292799830436704
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,256,power_law_1.01,0.050419199466705325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,256,power_law_1.01,0.05027840137481689
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,256,power_law_1.01,0.0506496012210846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,256,power_law_1.01,0.051545602083206174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,256,power_law_1.01,0.05135999917984009
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,256,power_law_1.01,0.05237119793891907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,256,power_law_1.01,0.05316479802131653
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,256,power_law_1.01,0.056704002618789676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,256,power_law_1.01,0.056409597396850586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,256,power_law_1.01,0.05983359813690185
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,256,power_law_1.01,0.06584320068359376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,256,power_law_1.01,0.07022079825401306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,256,power_law_1.01,0.08082559704780579
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,256,power_law_1.01,0.09088000059127807
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,256,power_law_1.01,0.11437439918518066
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,256,power_law_1.01,0.14060159921646118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,256,power_law_1.01,0.2038719892501831
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,256,power_law_1.01,0.24963839054107667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,256,power_law_1.01,0.36350719928741454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,256,power_law_1.01,0.4502272129058838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,1,power_law_1.01,0.018028800189495087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,1,power_law_1.01,0.019167999923229217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,1,power_law_1.01,0.021631999313831328
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,1,power_law_1.01,0.027827200293540955
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,1,power_law_1.01,0.03343999981880188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,1,power_law_1.01,0.04456959962844849
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,1,power_law_1.01,0.053855997323989865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,1,power_law_1.01,0.0551360011100769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,1,power_law_1.01,0.056806397438049314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,1,power_law_1.01,0.05927039980888367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,1,power_law_1.01,0.06291840076446534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,1,power_law_1.01,0.06661760210990905
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,1,power_law_1.01,0.0697920024394989
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,1,power_law_1.01,0.07490559816360473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,1,power_law_1.01,0.07377279996871948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,1,power_law_1.01,0.07896959781646729
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,1,power_law_1.01,0.0876800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,1,power_law_1.01,0.08554239869117737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,1,power_law_1.01,0.09710720181465149
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,1,power_law_1.01,0.11208959817886352
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,1,power_law_1.01,0.13043839931488038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,1,power_law_1.01,0.21187200546264648
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,1,power_law_1.01,0.2492095947265625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,1,power_law_1.01,0.2282111883163452
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,1,power_law_1.01,0.2665663957595825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,1,power_law_1.01,0.34778239727020266
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,1,power_law_1.01,0.4496128082275391
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,2,32,power_law_1.2,0.06500480175018311
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,2,32,power_law_1.2,0.05575680136680603
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,2,32,power_law_1.2,0.054099202156066895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,2,32,power_law_1.2,0.05971199870109558
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,2,32,power_law_1.2,0.05983999967575073
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,2,32,power_law_1.2,0.049644801020622256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,2,32,power_law_1.2,0.057606399059295654
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,2,32,power_law_1.2,0.056985598802566526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,2,32,power_law_1.2,0.05867519974708557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,2,32,power_law_1.2,0.060889601707458496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,2,32,power_law_1.2,0.06122879981994629
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,2,32,power_law_1.2,0.06433280110359192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,2,32,power_law_1.2,0.06507520079612732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,2,32,power_law_1.2,0.06492159962654113
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,2,32,power_law_1.2,0.07266560196876526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,2,32,power_law_1.2,0.07420160174369812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,2,32,power_law_1.2,0.07565439939498901
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,2,32,power_law_1.2,0.08380159735679626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,2,32,power_law_1.2,0.08819199800491333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,2,32,power_law_1.2,0.10327680110931396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,2,32,power_law_1.2,0.1112064003944397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,2,32,power_law_1.2,0.13341439962387086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,2,32,power_law_1.2,0.16357760429382323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,2,32,power_law_1.2,0.20079998970031737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,2,32,power_law_1.2,0.2678591966629028
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,2,32,power_law_1.2,0.3729664087295532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,2,32,power_law_1.2,0.4813504219055176
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,16,balanced,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,16,balanced,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,16,balanced,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,16,balanced,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,16,balanced,0.025120000044504803
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,16,balanced,0.029333333174387615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,16,balanced,0.035429333647092186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,16,balanced,0.03445333242416382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,16,balanced,0.04868266483147939
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,16,balanced,0.04641066491603851
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,16,balanced,0.04549333453178406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,16,balanced,0.05457599957784017
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,16,balanced,0.05472533404827118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,16,balanced,0.05386666456858317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,16,balanced,0.06942399839560191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,16,balanced,0.06665599842866261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,16,balanced,0.0658133327960968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,16,balanced,0.06630399823188782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,16,balanced,0.07212799787521362
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,16,balanced,0.08226666847864787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,16,balanced,0.09516800443331401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,16,balanced,0.10921600461006165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,16,balanced,0.1285920043786367
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,16,balanced,0.1618613302707672
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,16,balanced,0.19612266620000204
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,16,balanced,0.2945546706517537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,16,balanced,0.3615413506825765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,4,power_law_1.01,0.06869760155677795
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,4,power_law_1.01,0.08481919765472412
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,4,power_law_1.01,0.0876416027545929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,4,power_law_1.01,0.10333440303802491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,4,power_law_1.01,0.11520639657974244
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,4,power_law_1.01,0.11580799818038941
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,4,power_law_1.01,0.13052159547805786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,4,power_law_1.01,0.14096640348434447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,4,power_law_1.01,0.13415039777755738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,4,power_law_1.01,0.1343999981880188
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,4,power_law_1.01,0.13939199447631836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,4,power_law_1.01,0.1425920009613037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,4,power_law_1.01,0.14577920436859132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,4,power_law_1.01,0.14670079946517944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,4,power_law_1.01,0.14999680519104003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,4,power_law_1.01,0.15175679922103882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,4,power_law_1.01,0.15574400424957274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,4,power_law_1.01,0.17402880191802977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,32,balanced,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,32,balanced,0.023082666099071503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,4,power_law_1.01,0.17937920093536378
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,32,balanced,0.023061332603295643
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,32,balanced,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,32,balanced,0.023221333821614582
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,32,balanced,0.029466666281223297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,32,balanced,0.03664000084002813
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,4,power_law_1.01,0.2084736108779907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,32,balanced,0.03453866640726725
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,32,balanced,0.05240533252557119
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,32,balanced,0.052373334765434265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,32,balanced,0.05146666864554087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,32,balanced,0.054832001527150474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,4,power_law_1.01,0.23513600826263428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,32,balanced,0.05110399921735128
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,32,balanced,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,32,balanced,0.06850666801134746
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,32,balanced,0.06398933132489522
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,32,balanced,0.05607999861240387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,4,power_law_1.01,0.28942720890045165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,32,balanced,0.06724266707897186
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,32,balanced,0.075013334552447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,32,balanced,0.0879146655400594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,32,balanced,0.10167466600735982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,4,power_law_1.01,0.3487231969833374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,32,balanced,0.12909866372744241
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,32,balanced,0.15899733702341715
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,32,balanced,0.2142933408419291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,32,balanced,0.27113600571950275
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,4,power_law_1.01,0.4912831783294678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,32,balanced,0.396778662999471
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,32,balanced,0.511189341545105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,4,power_law_1.01,0.6123904228210449
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,4,power_law_1.01,0.9230719566345215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,4,power_law_1.01,1.1843711853027343
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,32,power_law_1.01,0.017836800217628478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,32,power_law_1.01,0.017836800217628478
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,32,power_law_1.01,0.017280000448226928
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,32,power_law_1.01,0.017478400468826295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,32,power_law_1.01,0.018035200238227845
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,32,power_law_1.01,0.018777599930763243
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,32,power_law_1.01,0.02190079987049103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,32,power_law_1.01,0.02252800017595291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,32,power_law_1.01,0.03558399975299835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,32,power_law_1.01,0.03580799996852875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,32,power_law_1.01,0.036185601353645326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,32,power_law_1.01,0.03134720027446747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,32,power_law_1.01,0.03147520124912262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,32,power_law_1.01,0.0315775990486145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,32,power_law_1.01,0.032128000259399415
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,32,power_law_1.01,0.03273600041866302
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,32,power_law_1.01,0.03404799997806549
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,32,power_law_1.01,0.03598720133304596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,32,power_law_1.01,0.03672960102558136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,32,power_law_1.01,0.042316800355911253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,32,power_law_1.01,0.04644480049610138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,32,power_law_1.01,0.05431680083274841
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,32,power_law_1.01,0.0644864022731781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,32,power_law_1.01,0.08716800212860107
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,32,power_law_1.01,0.1097599983215332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,32,power_law_1.01,0.14734079837799072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,32,power_law_1.01,0.1854464054107666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,2,64,balanced,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,2,64,balanced,0.04384533564249674
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,2,64,balanced,0.04363733530044556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,2,64,balanced,0.04570133487383524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,2,64,balanced,0.047797332207361855
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,2,64,balanced,0.04749333361784617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,2,64,balanced,0.05023466547330221
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,2,64,balanced,0.05165866514046987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,2,64,balanced,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,2,64,balanced,0.053770666321118675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,2,64,balanced,0.05189333359400431
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,2,64,balanced,0.05177066723505656
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,2,64,balanced,0.05392000079154968
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,2,64,balanced,0.05614933371543884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,2,64,balanced,0.0637546678384145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,2,64,balanced,0.06022400160630544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,2,64,balanced,0.06436799963315327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,2,64,balanced,0.06822933256626129
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,2,64,balanced,0.07443200051784515
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,2,64,balanced,0.08501332998275757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,2,64,balanced,0.09239466985066731
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,2,64,balanced,0.11533332864443462
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,2,64,balanced,0.13023466865221658
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,2,64,balanced,0.1748746633529663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,2,64,balanced,0.2157920002937317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,2,64,balanced,0.29815999666849774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,2,64,balanced,0.3768639961878459
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,8,power_law_1.2,0.020473599433898926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,8,power_law_1.2,0.01886080056428909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,8,power_law_1.2,0.019571200013160706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,8,power_law_1.2,0.020160000026226043
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,8,power_law_1.2,0.02160000056028366
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,8,power_law_1.2,0.02189439982175827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,8,power_law_1.2,0.022572800517082214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,8,power_law_1.2,0.022419199347496033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,8,power_law_1.2,0.023583999276161192
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,8,power_law_1.2,0.024166400730609893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,8,power_law_1.2,0.024985599517822265
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,8,power_law_1.2,0.02565760016441345
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,8,power_law_1.2,0.025561600923538208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,8,power_law_1.2,0.026604801416397095
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,8,power_law_1.2,0.02967039942741394
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,8,power_law_1.2,0.03304319977760315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,8,power_law_1.2,0.03503359854221344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,8,power_law_1.2,0.03957119882106781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,8,power_law_1.2,0.0591808021068573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,8,power_law_1.2,0.06940159797668458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,8,power_law_1.2,0.06376320123672485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,8,power_law_1.2,0.07548159956932068
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,8,power_law_1.2,0.08501120209693909
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,8,power_law_1.2,0.11119999885559081
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,8,power_law_1.2,0.1339967966079712
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,8,power_law_1.2,0.17804800271987914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,8,power_law_1.2,0.2346112012863159
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,64,power_law_1.01,0.018156799674034118
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,64,power_law_1.01,0.017472000420093538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,64,power_law_1.01,0.018195199966430663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,64,power_law_1.01,0.01855359971523285
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,64,power_law_1.01,0.02069759964942932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,64,power_law_1.01,0.024051199853420257
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,64,power_law_1.01,0.03668479919433594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,64,power_law_1.01,0.03606399893760681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,64,power_law_1.01,0.033318400382995605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,64,power_law_1.01,0.03333759903907776
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,64,power_law_1.01,0.0332863986492157
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,64,power_law_1.01,0.03418880105018616
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,64,power_law_1.01,0.033478400111198424
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,64,power_law_1.01,0.03290880024433136
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,64,power_law_1.01,0.033471998572349546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,64,power_law_1.01,0.03311359882354736
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,64,power_law_1.01,0.0346560001373291
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,64,power_law_1.01,0.036627200245857236
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,64,power_law_1.01,0.03767040073871612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,64,power_law_1.01,0.044844800233840944
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,64,power_law_1.01,0.05107839703559876
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,64,power_law_1.01,0.06439039707183838
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,64,power_law_1.01,0.07522559762001038
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,64,power_law_1.01,0.0967296004295349
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,64,power_law_1.01,0.12011519670486451
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,64,power_law_1.01,0.17137279510498046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,64,power_law_1.01,0.22209279537200927
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,8,power_law_1.2,0.056959998607635495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,8,power_law_1.2,0.07363839745521546
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,8,power_law_1.2,0.07613440155982971
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,8,power_law_1.2,0.08880000114440918
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,8,power_law_1.2,0.09633920192718506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,8,power_law_1.2,0.1032256007194519
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,8,power_law_1.2,0.10035200119018554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,8,power_law_1.2,0.10229120254516602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,8,power_law_1.2,0.10146559476852417
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,8,power_law_1.2,0.09900799989700318
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,8,power_law_1.2,0.10629119873046874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,8,power_law_1.2,0.10781439542770385
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,8,power_law_1.2,0.11066880226135253
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,8,power_law_1.2,0.11492480039596557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,8,power_law_1.2,0.12263679504394531
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,8,power_law_1.2,0.12659840583801268
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,8,power_law_1.2,0.13185280561447144
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,8,power_law_1.2,0.14965120553970337
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,8,power_law_1.2,0.15473920106887817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,8,power_law_1.2,0.1788800001144409
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,8,power_law_1.2,0.1987391948699951
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,8,power_law_1.2,0.2572416067123413
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,8,power_law_1.2,0.2927232027053833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,8,power_law_1.2,0.4193984031677246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,8,power_law_1.2,0.4856448173522949
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,8,power_law_1.2,0.7156544208526612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,8,power_law_1.2,0.8426367759704589
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,64,power_law_1.2,0.05627520084381103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,64,power_law_1.2,0.04433920085430145
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,64,power_law_1.2,0.04361599981784821
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,64,power_law_1.2,0.047539201378822324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,64,power_law_1.2,0.04533120095729828
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,64,power_law_1.2,0.045311999320983884
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,64,power_law_1.2,0.04578559994697571
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,64,power_law_1.2,0.04749439954757691
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,64,power_law_1.2,0.04714879989624023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,64,power_law_1.2,0.048403200507164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,64,power_law_1.2,0.049702399969100954
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,64,power_law_1.2,0.05114240050315857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,64,power_law_1.2,0.054841601848602296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,64,power_law_1.2,0.057811200618743896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,64,power_law_1.2,0.06188160181045532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,64,power_law_1.2,0.06293119788169861
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,64,power_law_1.2,0.0648959994316101
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,64,power_law_1.2,0.0788927972316742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,64,power_law_1.2,0.08126720190048217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,64,power_law_1.2,0.10183039903640748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,64,power_law_1.2,0.11355520486831665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,64,power_law_1.2,0.1469696044921875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,64,power_law_1.2,0.1927616000175476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,64,power_law_1.2,0.2375488042831421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,64,power_law_1.2,0.3224832057952881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,64,power_law_1.2,0.44992637634277344
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,64,power_law_1.2,0.579091215133667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,1,power_law_1.2,0.1064255952835083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,1,power_law_1.2,0.13898240327835082
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,1,power_law_1.2,0.19086719751358033
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,1,power_law_1.2,0.25464320182800293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,1,power_law_1.2,0.31030399799346925
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,1,power_law_1.2,0.3776576042175293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,1,power_law_1.2,0.4973887920379639
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,1,power_law_1.2,0.535859203338623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,1,power_law_1.2,0.5568575859069824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,1,power_law_1.2,0.5618303775787353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,1,power_law_1.2,0.5828991889953613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,1,power_law_1.2,0.6009856224060058
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,1,power_law_1.2,0.6052544116973877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,1,power_law_1.2,0.6178175926208496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,1,power_law_1.2,0.6574656009674072
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,1,power_law_1.2,0.6453504085540771
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,1,power_law_1.2,0.6841919898986817
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,1,power_law_1.2,0.734284782409668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,1,power_law_1.2,0.7855679988861084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,1,power_law_1.2,0.9029888153076172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,1,power_law_1.2,1.0029120445251465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,1,power_law_1.2,1.193894386291504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,1,power_law_1.2,1.377894401550293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,1,power_law_1.2,1.784351921081543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,16,balanced,0.04385599990685781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,16,balanced,0.04403733213742574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,16,balanced,0.04437333345413208
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,16,balanced,0.047983999053637184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,1,power_law_1.2,2.160544013977051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,16,balanced,0.050240000089009605
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,16,balanced,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,16,balanced,0.05399466554323832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,16,balanced,0.05668266614278158
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,16,balanced,0.05414933462937673
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,16,balanced,0.05607999861240387
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,16,balanced,0.056405335664749146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,16,balanced,0.05791999896367391
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,16,balanced,0.05845333139101664
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,16,balanced,0.060133333007494606
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,16,balanced,0.0705386648575465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,16,balanced,0.0724426656961441
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,16,balanced,0.07563733557860057
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,16,balanced,0.07667733232180278
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,16,balanced,0.08490133285522461
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,16,balanced,0.09698133667310078
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,16,balanced,0.1083573301633199
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,16,balanced,0.13553067048390707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,16,balanced,0.1562933325767517
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,16,balanced,0.20468266805013022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,16,balanced,0.25034133593241376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,1,power_law_1.2,2.9338623046875
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,16,balanced,0.35170666376749676
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,16,balanced,0.44973333676656085
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,1,power_law_1.2,3.664089584350586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,4,balanced,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,4,balanced,0.01940800001223882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,4,balanced,0.020986666282018025
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,4,balanced,0.023013333479563396
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,4,balanced,0.025237334271272022
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,4,balanced,0.02926933268706004
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,4,balanced,0.029301332930723827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,4,balanced,0.031311998764673867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,4,balanced,0.03526933242877325
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,4,balanced,0.03835200021664301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,4,balanced,0.037690666814645134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,4,balanced,0.041519999504089355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,4,balanced,0.04011200120051702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,4,balanced,0.04274666806062063
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,4,balanced,0.058058664202690125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,4,balanced,0.0580266664425532
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,4,balanced,0.061194668213526406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,4,balanced,0.0763626645008723
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,4,balanced,0.081194669008255
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,4,balanced,0.10421866178512573
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,4,balanced,0.11181867122650146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,4,balanced,0.1225386659304301
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,4,balanced,0.13674666484196982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,4,balanced,0.15784533818562826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,4,balanced,0.17500799894332886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,4,balanced,0.2804639935493469
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,4,balanced,0.3136213421821594
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,4,power_law_1.2,0.021670399606227873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,4,power_law_1.2,0.02456959933042526
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,4,power_law_1.2,0.026630398631095887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,4,power_law_1.2,0.03482879996299744
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,4,power_law_1.2,0.03963519930839539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,4,power_law_1.2,0.03827199935913086
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,4,power_law_1.2,0.04992640018463135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,4,power_law_1.2,0.04919680058956146
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,4,power_law_1.2,0.05103999972343445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,4,power_law_1.2,0.0537280023097992
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,4,power_law_1.2,0.05413119792938233
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,4,power_law_1.2,0.05630080103874206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,4,power_law_1.2,0.05852800011634827
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,4,power_law_1.2,0.057811200618743896
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,4,power_law_1.2,0.05868800282478333
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,4,power_law_1.2,0.06291840076446534
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,4,power_law_1.2,0.0751039981842041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,4,power_law_1.2,0.08365439772605895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,4,power_law_1.2,0.09605119824409485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,4,power_law_1.2,0.12042880058288574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,4,power_law_1.2,0.12353919744491577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,4,power_law_1.2,0.13789440393447877
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,4,power_law_1.2,0.148307204246521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,4,power_law_1.2,0.18275200128555297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,4,power_law_1.2,0.23267838954925538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,4,power_law_1.2,0.30547199249267576
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,4,power_law_1.2,0.36082561016082765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,4,power_law_1.01,0.04271360039710999
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,4,power_law_1.01,0.05544319748878479
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,4,power_law_1.01,0.06090880036354065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,4,power_law_1.01,0.07454079985618592
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,4,power_law_1.01,0.07943040132522583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,4,power_law_1.01,0.08517760038375854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,4,power_law_1.01,0.1021183967590332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,4,power_law_1.01,0.10443520545959473
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,4,power_law_1.01,0.11144959926605225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,4,power_law_1.01,0.10647679567337036
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,4,power_law_1.01,0.1061568021774292
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,4,power_law_1.01,0.11030399799346924
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,4,power_law_1.01,0.11107200384140015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,4,power_law_1.01,0.11751680374145508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,4,power_law_1.01,0.1250175952911377
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,4,power_law_1.01,0.1245311975479126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,4,power_law_1.01,0.1374079942703247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,4,power_law_1.01,0.15276800394058226
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,4,power_law_1.01,0.16324479579925538
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,4,power_law_1.01,0.19467519521713256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,4,power_law_1.01,0.2130944013595581
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,4,power_law_1.01,0.26974079608917234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,4,power_law_1.01,0.32705280780792234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,4,power_law_1.01,0.4255551815032959
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,4,power_law_1.01,0.5228799819946289
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,4,power_law_1.01,0.7597824096679687
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,4,power_law_1.01,0.9481216430664062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,1,2,balanced,0.05494933327039083
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,1,2,balanced,0.07275199890136719
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,1,2,balanced,0.10481066505114238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,1,2,balanced,0.1706399917602539
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,1,2,balanced,0.29286932945251465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,1,2,balanced,0.539413332939148
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,1,2,balanced,0.5401120185852051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,1,2,balanced,0.5398186842600504
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,1,2,balanced,0.5426506598790487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,1,2,balanced,0.5427413384119669
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,1,2,balanced,0.5451573530832926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,1,2,balanced,0.5491413275400797
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,1,2,balanced,0.5519040028254191
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,1,2,balanced,0.5549493233362833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,1,2,balanced,0.56168532371521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,1,2,balanced,0.5634346803029379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,1,2,balanced,0.5702879826227824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,1,2,balanced,0.5877013206481934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,1,2,balanced,0.6026080052057902
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,1,2,balanced,0.6298240025838217
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,1,2,balanced,0.6758399804433187
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,1,2,balanced,0.7342080275217692
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,1,2,balanced,0.774405320485433
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,1,2,balanced,0.9685173034667969
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,1,2,balanced,1.0285173257191975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,1,2,balanced,1.4293120702107747
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,1,2,balanced,1.60097074508667
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,4,balanced,0.07855466504891713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,4,balanced,0.08073066671689351
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,4,balanced,0.0888266662756602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,4,balanced,0.11082133650779724
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,4,balanced,0.14549332857131958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,4,balanced,0.21453332901000977
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,4,balanced,0.21684799591700235
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,4,balanced,0.21846399704615274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,4,balanced,0.21609065930048624
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,4,balanced,0.21970132986704508
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,4,balanced,0.21984533468882242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,4,balanced,0.22247999906539917
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,4,balanced,0.22607467571894327
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,4,balanced,0.2264853318532308
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,4,balanced,0.23089067141215006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,4,balanced,0.23272534211476645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,4,balanced,0.23548799753189087
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,4,balanced,0.2434719999631246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,4,balanced,0.25421865781148273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,4,balanced,0.2771786650021871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,4,balanced,0.29260800282160443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,4,balanced,0.331002672513326
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,4,balanced,0.3720426559448242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,4,balanced,0.5188106695810953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,4,balanced,0.6080960035324097
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,4,balanced,0.8579306602478027
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,4,balanced,1.120959997177124
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,2,balanced,0.04797333478927612
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,2,balanced,0.05608533322811127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,2,balanced,0.07630933324495952
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,2,balanced,0.11115200320879619
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,2,balanced,0.17629865805308023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,2,balanced,0.29684267441431683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,2,balanced,0.298799991607666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,2,balanced,0.30089600880940753
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,2,balanced,0.3024959961573283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,2,balanced,0.3030346632003784
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,2,balanced,0.3065706690152486
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,2,balanced,0.31065066655476886
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,2,balanced,0.3113279938697815
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,2,balanced,0.31866133213043213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,2,balanced,0.32387200991312665
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,2,balanced,0.3278026580810547
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,2,balanced,0.3358773390452067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,2,balanced,0.35523732503255206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,2,balanced,0.370250662167867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,2,balanced,0.4038879871368408
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,2,balanced,0.44762134552001953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,2,balanced,0.5210346778233846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,2,balanced,0.5790826479593912
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,2,balanced,0.761141300201416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,2,balanced,0.8644106388092041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,2,balanced,1.2481280167897542
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,2,balanced,1.494607925415039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,1,32,power_law_1.01,0.056492799520492555
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,1,32,power_law_1.01,0.056467199325561525
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,1,32,power_law_1.01,0.0551360011100769
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,1,32,power_law_1.01,0.05994240045547485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,1,32,power_law_1.01,0.058195197582244874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,1,32,power_law_1.01,0.053273600339889524
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,1,32,power_law_1.01,0.05663359761238098
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,1,32,power_law_1.01,0.06133120059967041
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,1,32,power_law_1.01,0.05379199981689453
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,1,32,power_law_1.01,0.0565887987613678
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,1,32,power_law_1.01,0.063673597574234
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,1,32,power_law_1.01,0.06232960224151611
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,1,32,power_law_1.01,0.0626688003540039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,1,32,power_law_1.01,0.06717439889907836
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,1,32,power_law_1.01,0.06997119784355163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,1,32,power_law_1.01,0.073990398645401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,1,32,power_law_1.01,0.07972480058670044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,1,32,power_law_1.01,0.08994560241699219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,1,32,power_law_1.01,0.09189119935035706
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,1,32,power_law_1.01,0.1081279993057251
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,1,32,power_law_1.01,0.12588800191879274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,1,32,power_law_1.01,0.15111039876937865
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,1,32,power_law_1.01,0.1844032049179077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,1,32,power_law_1.01,0.24776959419250488
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,1,32,power_law_1.01,0.2991744041442871
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,1,32,power_law_1.01,0.41328001022338867
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,1,32,power_law_1.01,0.5213247776031494
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,32,power_law_1.2,0.047686401009559634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,32,power_law_1.2,0.04627200067043304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,32,power_law_1.2,0.04568960070610047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,32,power_law_1.2,0.046515199542045596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,32,power_law_1.2,0.044038400053977966
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,32,power_law_1.2,0.045388799905776975
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,32,power_law_1.2,0.04609279930591583
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,32,power_law_1.2,0.047417598962783816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,32,power_law_1.2,0.04742400050163269
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,32,power_law_1.2,0.04862079918384552
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,32,power_law_1.2,0.0495743989944458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,32,power_law_1.2,0.05095040202140808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,32,power_law_1.2,0.053420799970626834
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,32,power_law_1.2,0.055270397663116456
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,32,power_law_1.2,0.06128000020980835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,32,power_law_1.2,0.06223999857902527
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,32,power_law_1.2,0.06689280271530151
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,32,power_law_1.2,0.07614719867706299
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,32,power_law_1.2,0.08098559975624084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,32,power_law_1.2,0.10124800205230713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,32,power_law_1.2,0.11580159664154052
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,32,power_law_1.2,0.15073920488357545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,32,power_law_1.2,0.18169599771499634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,32,power_law_1.2,0.23625600337982178
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,32,power_law_1.2,0.3019520044326782
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,32,power_law_1.2,0.44033279418945315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,32,power_law_1.2,0.5524735927581788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,1,power_law_1.2,0.020377600193023683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,1,power_law_1.2,0.024358400702476503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,1,power_law_1.2,0.0364544004201889
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,1,power_law_1.2,0.05041279792785645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,1,power_law_1.2,0.06417279839515685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,1,power_law_1.2,0.08513919711112976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,1,power_law_1.2,0.10930559635162354
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,1,power_law_1.2,0.11579519510269165
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,1,power_law_1.2,0.11897599697113037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,1,power_law_1.2,0.12348799705505371
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,1,power_law_1.2,0.12914559841156006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,1,power_law_1.2,0.13326719999313355
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,1,power_law_1.2,0.13259520530700683
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,1,power_law_1.2,0.14375040531158448
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,1,power_law_1.2,0.14839680194854737
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,1,64,balanced,0.045994664231936135
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,1,64,balanced,0.04601066807905833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,1,64,balanced,0.04375466704368591
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,1,64,balanced,0.046053335070610046
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,1,64,balanced,0.04987733562787374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,1,64,balanced,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,1,64,balanced,0.05148266752560934
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,1,64,balanced,0.05213866631189982
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,1,64,balanced,0.051957334081331887
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,1,64,balanced,0.05197866757710775
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,1,64,balanced,0.05285866558551788
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,1,64,balanced,0.05406400064627329
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,1,64,balanced,0.051882664362589516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,1,64,balanced,0.05594133337338766
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,1,64,balanced,0.057818666100502014
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,1,64,balanced,0.058245331048965454
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,1,64,balanced,0.06205866734186808
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,1,64,balanced,0.06634666522343953
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,1,64,balanced,0.06863466898600261
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,1,64,balanced,0.07653333246707916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,1,power_law_1.2,0.1562880039215088
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,1,64,balanced,0.08470400174458821
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,1,64,balanced,0.1032426655292511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,1,power_law_1.2,0.1662976026535034
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,1,64,balanced,0.11759466926256816
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,1,64,balanced,0.16131200393040976
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,1,64,balanced,0.1890559991200765
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,1,power_law_1.2,0.19347840547561646
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,1,64,balanced,0.2585493326187134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,1,64,balanced,0.31722132364908856
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,1,power_law_1.2,0.19195519685745238
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,1,power_law_1.2,0.2414016008377075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,1,power_law_1.2,0.24909439086914062
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,1,power_law_1.2,0.32768640518188474
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,1,power_law_1.2,0.3047679901123047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,1,power_law_1.2,0.4141695976257324
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,1,power_law_1.2,0.5037951946258545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,1,power_law_1.2,0.7075007915496826
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,1,power_law_1.2,0.8851840019226074
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,8,32,balanced,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,8,32,balanced,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,8,32,balanced,0.019440000255902607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,8,32,balanced,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,8,32,balanced,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,8,32,balanced,0.0198186660806338
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,8,32,balanced,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,8,32,balanced,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,8,32,balanced,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,8,32,balanced,0.03772266705830892
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,8,32,balanced,0.03760000069936117
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,8,32,balanced,0.034976000587145485
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,8,32,balanced,0.03366933266321818
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,8,32,balanced,0.033514666060606636
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,8,32,balanced,0.03739733248949051
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,8,32,balanced,0.03807466725508372
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,8,32,balanced,0.0374293327331543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,8,32,balanced,0.04206933577855428
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,8,32,balanced,0.04358933369318644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,8,32,balanced,0.049098665515581764
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,8,32,balanced,0.05677866439024607
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,8,32,balanced,0.07457066575686137
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,8,32,balanced,0.0879253347714742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,8,32,balanced,0.11533866326014201
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,8,32,balanced,0.14412267009417215
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,8,32,balanced,0.20291733741760254
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,8,32,balanced,0.2600373427073161
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,16,16,power_law_1.2,0.04219520092010498
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,16,16,power_law_1.2,0.046828800439834596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,16,16,power_law_1.2,0.04376960098743439
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,16,16,power_law_1.2,0.04561919867992401
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,16,16,power_law_1.2,0.045977601408958436
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,16,16,power_law_1.2,0.04611839950084686
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,16,16,power_law_1.2,0.0466623991727829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,16,16,power_law_1.2,0.047788798809051514
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,16,16,power_law_1.2,0.04775039851665497
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,16,16,power_law_1.2,0.048198398947715757
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,16,16,power_law_1.2,0.04955520033836365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,16,16,power_law_1.2,0.05066879987716675
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,16,16,power_law_1.2,0.05161600112915039
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,16,16,power_law_1.2,0.0541375994682312
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,16,16,power_law_1.2,0.058406400680541995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,16,16,power_law_1.2,0.06151679754257202
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,16,16,power_law_1.2,0.06261119842529297
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,16,16,power_law_1.2,0.07029119729995728
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,16,16,power_law_1.2,0.07516800165176392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,16,16,power_law_1.2,0.08992639780044556
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,16,16,power_law_1.2,0.10183039903640748
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,16,16,power_law_1.2,0.1372159957885742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,16,16,power_law_1.2,0.16508159637451172
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,16,16,power_law_1.2,0.2442944049835205
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,16,16,power_law_1.2,0.29395198822021484
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,16,16,power_law_1.2,0.4503039836883545
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,16,16,power_law_1.2,0.5777728080749511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,1,128,balanced,0.019487999379634857
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,1,128,balanced,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,1,128,balanced,0.021269333859284718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,1,128,balanced,0.02146666745344798
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,1,128,balanced,0.02516799916823705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,1,128,balanced,0.03756800045569738
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,1,128,balanced,0.03521066655715307
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,1,128,balanced,0.0352906659245491
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,1,128,balanced,0.03411199897527695
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,1,128,balanced,0.03445333242416382
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,1,128,balanced,0.03523733218510946
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,1,128,balanced,0.03551999976237615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,1,128,balanced,0.03316266586383184
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,1,128,balanced,0.031354665756225586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,1,128,balanced,0.03826666623353958
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,1,128,balanced,0.03994133323431015
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,1,128,balanced,0.035088000198205314
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,1,128,balanced,0.03748800108830134
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,1,128,balanced,0.04625066618124644
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,1,128,balanced,0.049882665276527405
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,1,128,balanced,0.061119998494784035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,1,128,balanced,0.08071466783682506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,1,128,balanced,0.10046399633089702
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,1,128,balanced,0.1360160013039907
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,1,128,balanced,0.17218132813771567
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,1,128,balanced,0.2460319995880127
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,1,128,balanced,0.3209226727485657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,4,power_law_1.01,0.08387200236320495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,4,power_law_1.01,0.10743039846420288
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,4,power_law_1.01,0.1152575969696045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,4,power_law_1.01,0.13917440176010132
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,4,power_law_1.01,0.15340160131454467
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,4,power_law_1.01,0.16521600484848023
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,4,power_law_1.01,0.19407999515533447
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,4,power_law_1.01,0.19969919919967652
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,4,power_law_1.01,0.19111039638519287
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,4,power_law_1.01,0.19774719476699829
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,4,power_law_1.01,0.21027839183807373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,4,power_law_1.01,0.20449280738830566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,4,power_law_1.01,0.2107072114944458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,4,power_law_1.01,0.2144063949584961
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,4,power_law_1.01,0.22082560062408446
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,4,power_law_1.01,0.2196352005004883
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,4,power_law_1.01,0.22376320362091065
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,4,power_law_1.01,0.2308351993560791
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,4,power_law_1.01,0.25467519760131835
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,4,power_law_1.01,0.2877887964248657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,4,power_law_1.01,0.31734399795532225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,4,power_law_1.01,0.3884351968765259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,4,power_law_1.01,0.4739327907562256
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,4,power_law_1.01,0.612275218963623
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,4,power_law_1.01,0.7626304149627685
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,4,power_law_1.01,1.0936575889587403
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,4,power_law_1.01,1.4044672012329102
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,32,power_law_1.01,0.04531840085983276
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,32,power_law_1.01,0.04188799858093262
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,32,power_law_1.01,0.04115839898586273
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,32,power_law_1.01,0.039263999462127684
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,32,power_law_1.01,0.04462080001831055
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,32,power_law_1.01,0.04483200013637543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,32,power_law_1.01,0.04425599873065948
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,32,power_law_1.01,0.04681600034236908
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,32,power_law_1.01,0.04597119987010956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,32,power_law_1.01,0.04620159864425659
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,32,power_law_1.01,0.04891520142555237
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,32,power_law_1.01,0.04954879879951477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,32,power_law_1.01,0.05240960121154785
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,32,power_law_1.01,0.05608959794044495
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,32,power_law_1.01,0.06136959791183472
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,32,power_law_1.01,0.06069759726524353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,32,power_law_1.01,0.05868160128593445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,32,power_law_1.01,0.06632320284843445
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,32,power_law_1.01,0.0704255998134613
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,32,power_law_1.01,0.08421760201454162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,32,power_law_1.01,0.09242879748344421
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,32,power_law_1.01,0.11424640417099
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,32,power_law_1.01,0.13020800352096557
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,32,power_law_1.01,0.17974400520324707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,32,power_law_1.01,0.22071681022644044
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,32,power_law_1.01,0.2846208095550537
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,32,power_law_1.01,0.3803136110305786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,2,2,power_law_1.2,0.049235200881958006
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,2,2,power_law_1.2,0.05797759890556335
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,2,2,power_law_1.2,0.06506239771842956
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,2,2,power_law_1.2,0.08438400030136109
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,2,2,power_law_1.2,0.10693119764328003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,2,2,power_law_1.2,0.12025599479675293
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,2,2,power_law_1.2,0.14789119958877564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,2,2,power_law_1.2,0.1565888047218323
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,2,2,power_law_1.2,0.1601408004760742
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,2,2,power_law_1.2,0.16005120277404786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,2,2,power_law_1.2,0.16331520080566406
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,2,2,power_law_1.2,0.17139840126037598
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,2,2,power_law_1.2,0.17469439506530762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,2,2,power_law_1.2,0.1792448043823242
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,2,2,power_law_1.2,0.18870400190353392
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,2,2,power_law_1.2,0.19519360065460206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,2,2,power_law_1.2,0.20831360816955566
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,2,2,power_law_1.2,0.23771519660949708
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,2,2,power_law_1.2,0.25588479042053225
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,2,2,power_law_1.2,0.3042304039001465
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,2,2,power_law_1.2,0.32466559410095214
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,2,2,power_law_1.2,0.42855038642883303
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,2,2,power_law_1.2,0.4957183837890625
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,2,2,power_law_1.2,0.70830078125
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,2,2,power_law_1.2,0.8621376037597657
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,2,2,power_law_1.2,1.1666560173034668
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,2,2,power_law_1.2,1.448812770843506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1,3072,1536,8,256,2,4,power_law_1.01,0.021657599508762358
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2,3072,1536,8,256,2,4,power_law_1.01,0.025113600492477416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4,3072,1536,8,256,2,4,power_law_1.01,0.031046399474143983
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8,3072,1536,8,256,2,4,power_law_1.01,0.03627519905567169
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16,3072,1536,8,256,2,4,power_law_1.01,0.04292480051517487
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,32,3072,1536,8,256,2,4,power_law_1.01,0.04201599955558777
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,48,3072,1536,8,256,2,4,power_law_1.01,0.04909439980983734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,64,3072,1536,8,256,2,4,power_law_1.01,0.05286399722099304
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,80,3072,1536,8,256,2,4,power_law_1.01,0.05422080159187317
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,96,3072,1536,8,256,2,4,power_law_1.01,0.05446400046348572
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,128,3072,1536,8,256,2,4,power_law_1.01,0.05548800230026245
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,160,3072,1536,8,256,2,4,power_law_1.01,0.055302399396896365
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,192,3072,1536,8,256,2,4,power_law_1.01,0.06046720147132874
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,256,3072,1536,8,256,2,4,power_law_1.01,0.05994880199432373
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,320,3072,1536,8,256,2,4,power_law_1.01,0.06094080209732056
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,384,3072,1536,8,256,2,4,power_law_1.01,0.06166399717330932
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,512,3072,1536,8,256,2,4,power_law_1.01,0.07596799731254578
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,768,3072,1536,8,256,2,4,power_law_1.01,0.08092799782752991
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1024,3072,1536,8,256,2,4,power_law_1.01,0.09077759981155395
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,1536,3072,1536,8,256,2,4,power_law_1.01,0.11226240396499634
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,2048,3072,1536,8,256,2,4,power_law_1.01,0.11642240285873413
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,3072,3072,1536,8,256,2,4,power_law_1.01,0.1278272032737732
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,4096,3072,1536,8,256,2,4,power_law_1.01,0.14402559995651246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,6144,3072,1536,8,256,2,4,power_law_1.01,0.17742719650268554
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,8192,3072,1536,8,256,2,4,power_law_1.01,0.21103999614715577
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,12288,3072,1536,8,256,2,4,power_law_1.01,0.2919167995452881
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_min_latency,nvfp4,16384,3072,1536,8,256,2,4,power_law_1.01,0.35272960662841796
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1,3072,1536,8,256,4,2,power_law_1.01,0.04650880098342895
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2,3072,1536,8,256,4,2,power_law_1.01,0.05775359869003296
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4,3072,1536,8,256,4,2,power_law_1.01,0.06734079718589783
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8,3072,1536,8,256,4,2,power_law_1.01,0.08744320273399353
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16,3072,1536,8,256,4,2,power_law_1.01,0.10753920078277587
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,32,3072,1536,8,256,4,2,power_law_1.01,0.12323199510574341
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,48,3072,1536,8,256,4,2,power_law_1.01,0.14956799745559693
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,64,3072,1536,8,256,4,2,power_law_1.01,0.15575040578842164
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,80,3072,1536,8,256,4,2,power_law_1.01,0.1519487977027893
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,96,3072,1536,8,256,4,2,power_law_1.01,0.15765119791030885
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,128,3072,1536,8,256,4,2,power_law_1.01,0.1587007999420166
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,160,3072,1536,8,256,4,2,power_law_1.01,0.1622912049293518
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,192,3072,1536,8,256,4,2,power_law_1.01,0.1671679973602295
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,256,3072,1536,8,256,4,2,power_law_1.01,0.1719871997833252
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,320,3072,1536,8,256,4,2,power_law_1.01,0.1786239981651306
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,384,3072,1536,8,256,4,2,power_law_1.01,0.1832128047943115
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,512,3072,1536,8,256,4,2,power_law_1.01,0.18993279933929444
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,768,3072,1536,8,256,4,2,power_law_1.01,0.20707199573516846
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1024,3072,1536,8,256,4,2,power_law_1.01,0.22442879676818847
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,1536,3072,1536,8,256,4,2,power_law_1.01,0.265395188331604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,2048,3072,1536,8,256,4,2,power_law_1.01,0.28383359909057615
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,3072,3072,1536,8,256,4,2,power_law_1.01,0.36715519428253174
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,4096,3072,1536,8,256,4,2,power_law_1.01,0.4268223762512207
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,6144,3072,1536,8,256,4,2,power_law_1.01,0.5349440097808837
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,8192,3072,1536,8,256,4,2,power_law_1.01,0.6415743827819824
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,12288,3072,1536,8,256,4,2,power_law_1.01,0.8905088424682617
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,float16,16384,3072,1536,8,256,4,2,power_law_1.01,1.1016575813293457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1,3072,1536,8,256,8,1,power_law_1.01,0.047679999470710756
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2,3072,1536,8,256,8,1,power_law_1.01,0.05004799962043762
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4,3072,1536,8,256,8,1,power_law_1.01,0.056409597396850586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8,3072,1536,8,256,8,1,power_law_1.01,0.07210239768028259
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16,3072,1536,8,256,8,1,power_law_1.01,0.08453119993209839
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,32,3072,1536,8,256,8,1,power_law_1.01,0.10016000270843506
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,48,3072,1536,8,256,8,1,power_law_1.01,0.11767040491104126
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,64,3072,1536,8,256,8,1,power_law_1.01,0.12228480577468873
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,80,3072,1536,8,256,8,1,power_law_1.01,0.12595839500427247
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,96,3072,1536,8,256,8,1,power_law_1.01,0.12924799919128419
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,128,3072,1536,8,256,8,1,power_law_1.01,0.1344063997268677
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,160,3072,1536,8,256,8,1,power_law_1.01,0.14174720048904418
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,192,3072,1536,8,256,8,1,power_law_1.01,0.14831360578536987
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,256,3072,1536,8,256,8,1,power_law_1.01,0.15922559499740602
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,320,3072,1536,8,256,8,1,power_law_1.01,0.17059199810028075
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,384,3072,1536,8,256,8,1,power_law_1.01,0.18019200563430787
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,512,3072,1536,8,256,8,1,power_law_1.01,0.17804800271987914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,768,3072,1536,8,256,8,1,power_law_1.01,0.21235198974609376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1024,3072,1536,8,256,8,1,power_law_1.01,0.2398848056793213
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,1536,3072,1536,8,256,8,1,power_law_1.01,0.2985856056213379
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,2048,3072,1536,8,256,8,1,power_law_1.01,0.3570751905441284
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,3072,3072,1536,8,256,8,1,power_law_1.01,0.4702911853790283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,4096,3072,1536,8,256,8,1,power_law_1.01,0.5726975917816162
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,6144,3072,1536,8,256,8,1,power_law_1.01,0.7980224132537842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,8192,3072,1536,8,256,8,1,power_law_1.01,1.0196672439575196
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,12288,3072,1536,8,256,8,1,power_law_1.01,1.4614527702331543
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,fp8,16384,3072,1536,8,256,8,1,power_law_1.01,1.9034175872802734
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,4,32,power_law_1.01,0.04535680115222931
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,4,32,power_law_1.01,0.04408960044384003
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,4,32,power_law_1.01,0.042412799596786496
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,4,32,power_law_1.01,0.04612480103969574
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,4,32,power_law_1.01,0.046419200301170346
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,4,32,power_law_1.01,0.04752640128135681
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,4,32,power_law_1.01,0.048019200563430786
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,4,32,power_law_1.01,0.049395200610160825
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,4,32,power_law_1.01,0.05000960230827332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,4,32,power_law_1.01,0.04896639883518219
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,4,32,power_law_1.01,0.05000960230827332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,4,32,power_law_1.01,0.05172479748725891
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,4,32,power_law_1.01,0.051737600564956666
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,4,32,power_law_1.01,0.05297279953956604
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,4,32,power_law_1.01,0.05777279734611511
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,4,32,power_law_1.01,0.0592960000038147
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,4,32,power_law_1.01,0.0629696011543274
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,4,32,power_law_1.01,0.0702015995979309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,4,32,power_law_1.01,0.07783679962158203
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,4,32,power_law_1.01,0.09631999731063842
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,4,32,power_law_1.01,0.10769920349121094
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,4,32,power_law_1.01,0.14250240325927735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,4,32,power_law_1.01,0.166867196559906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,4,32,power_law_1.01,0.22671360969543458
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,4,32,power_law_1.01,0.28883841037750246
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,4,32,power_law_1.01,0.41434240341186523
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,4,32,power_law_1.01,0.5239232063293457
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,1,power_law_1.2,0.15699199438095093
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,1,power_law_1.2,0.18335360288619995
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,1,power_law_1.2,0.2080319881439209
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,1,power_law_1.2,0.2680896043777466
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,1,power_law_1.2,0.3360640048980713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,1,power_law_1.2,0.3956415891647339
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,1,power_law_1.2,0.5442944049835206
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,1,power_law_1.2,0.5631296157836914
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,1,power_law_1.2,0.5798463821411133
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,1,power_law_1.2,0.601529598236084
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,1,power_law_1.2,0.6078400135040283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,1,power_law_1.2,0.6406015872955322
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,1,power_law_1.2,0.6431615829467774
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,1,power_law_1.2,0.6722752094268799
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,1,power_law_1.2,0.6786816120147705
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,1,power_law_1.2,0.7026432037353516
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,1,power_law_1.2,0.7159167766571045
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,1,power_law_1.2,0.7772736072540283
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,1,power_law_1.2,0.7936384201049804
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,1,power_law_1.2,0.8909248352050781
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,1,power_law_1.2,0.9788800239562988
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,1,power_law_1.2,1.14715518951416
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,1,power_law_1.2,1.3386303901672363
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,1,power_law_1.2,1.754470443725586
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,1,power_law_1.2,2.164729690551758
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,1,power_law_1.2,2.9662208557128906
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,1,power_law_1.2,3.7467582702636717
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1,3072,1536,8,256,1,4,power_law_1.2,0.0862335979938507
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2,3072,1536,8,256,1,4,power_law_1.2,0.10751359462738037
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4,3072,1536,8,256,1,4,power_law_1.2,0.11308159828186035
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8,3072,1536,8,256,1,4,power_law_1.2,0.14371199607849122
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16,3072,1536,8,256,1,4,power_law_1.2,0.148307204246521
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,32,3072,1536,8,256,1,4,power_law_1.2,0.17935999631881713
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,48,3072,1536,8,256,1,4,power_law_1.2,0.20576000213623047
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,64,3072,1536,8,256,1,4,power_law_1.2,0.18248319625854492
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,80,3072,1536,8,256,1,4,power_law_1.2,0.18995200395584105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,96,3072,1536,8,256,1,4,power_law_1.2,0.20157439708709718
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,128,3072,1536,8,256,1,4,power_law_1.2,0.21047039031982423
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,160,3072,1536,8,256,1,4,power_law_1.2,0.20437119007110596
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,192,3072,1536,8,256,1,4,power_law_1.2,0.21397759914398193
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,256,3072,1536,8,256,1,4,power_law_1.2,0.21338880062103271
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,320,3072,1536,8,256,1,4,power_law_1.2,0.22203519344329833
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,384,3072,1536,8,256,1,4,power_law_1.2,0.2251584053039551
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,512,3072,1536,8,256,1,4,power_law_1.2,0.23554561138153077
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,768,3072,1536,8,256,1,4,power_law_1.2,0.2512768030166626
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1024,3072,1536,8,256,1,4,power_law_1.2,0.264300799369812
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,1536,3072,1536,8,256,1,4,power_law_1.2,0.297491192817688
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,2048,3072,1536,8,256,1,4,power_law_1.2,0.330348801612854
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,3072,3072,1536,8,256,1,4,power_law_1.2,0.40954880714416503
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,4096,3072,1536,8,256,1,4,power_law_1.2,0.4988736152648926
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,6144,3072,1536,8,256,1,4,power_law_1.2,0.6393152236938476
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,8192,3072,1536,8,256,1,4,power_law_1.2,0.984556770324707
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,12288,3072,1536,8,256,1,4,power_law_1.2,1.2110783576965332
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,deepgemm,fp8_block,16384,3072,1536,8,256,1,4,power_law_1.2,1.69036808013916
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1,3072,1536,8,256,8,8,power_law_1.01,0.05490559935569763
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2,3072,1536,8,256,8,8,power_law_1.01,0.06101120114326477
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4,3072,1536,8,256,8,8,power_law_1.01,0.061536002159118655
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8,3072,1536,8,256,8,8,power_law_1.01,0.06421120166778564
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16,3072,1536,8,256,8,8,power_law_1.01,0.0723583996295929
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,32,3072,1536,8,256,8,8,power_law_1.01,0.0716480016708374
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,48,3072,1536,8,256,8,8,power_law_1.01,0.07361279726028443
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,64,3072,1536,8,256,8,8,power_law_1.01,0.0738048017024994
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,80,3072,1536,8,256,8,8,power_law_1.01,0.07317759990692138
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,96,3072,1536,8,256,8,8,power_law_1.01,0.07333760261535645
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,128,3072,1536,8,256,8,8,power_law_1.01,0.07260159850120544
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,160,3072,1536,8,256,8,8,power_law_1.01,0.07468159794807434
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,192,3072,1536,8,256,8,8,power_law_1.01,0.07652480006217957
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,256,3072,1536,8,256,8,8,power_law_1.01,0.0764415979385376
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,320,3072,1536,8,256,8,8,power_law_1.01,0.08312320113182067
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,384,3072,1536,8,256,8,8,power_law_1.01,0.08479359745979309
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,512,3072,1536,8,256,8,8,power_law_1.01,0.08914560079574585
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,768,3072,1536,8,256,8,8,power_law_1.01,0.09596800208091735
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1024,3072,1536,8,256,8,8,power_law_1.01,0.10607999563217163
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,1536,3072,1536,8,256,8,8,power_law_1.01,0.12428799867630005
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,2048,3072,1536,8,256,8,8,power_law_1.01,0.14232319593429565
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,3072,3072,1536,8,256,8,8,power_law_1.01,0.18008960485458375
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,4096,3072,1536,8,256,8,8,power_law_1.01,0.20505599975585936
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,6144,3072,1536,8,256,8,8,power_law_1.01,0.2853568077087402
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,8192,3072,1536,8,256,8,8,power_law_1.01,0.36773760318756105
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,12288,3072,1536,8,256,8,8,power_law_1.01,0.5218815803527832
TRTLLM,1.2.0rc6,NVIDIA GB200,moe,moe_torch_flow_cutlass,nvfp4,16384,3072,1536,8,256,8,8,power_law_1.01,0.6198080062866211
