framework,version,device,op_name,kernel_source,phase,batch_size,seq_len,num_tokens,d_model,d_state,d_conv,nheads,head_dim,n_groups,chunk_size,model_name,latency
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1,1,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.025312000513076784
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1,1,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.01696320027112961
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1,1,1,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.00514880008995533
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1,1,1,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0047136001288890835
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1,1,1,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.006003199890255928
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1,1,1,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.006886400282382965
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,2,1,2,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.005910399928689003
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,2,1,2,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.004604800045490265
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,2,1,2,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0065600000321865085
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,2,1,2,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.010700800269842149
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,4,1,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,4,1,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0062431998550891874
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,4,1,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.010761599987745285
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,4,1,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.015254400670528412
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,8,1,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0066143997013568875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,8,1,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.006553599983453751
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,8,1,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0151296004652977
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,8,1,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.02512960135936737
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,16,1,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.007561600208282471
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,16,1,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.00671359971165657
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,16,1,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.043609601259231565
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,16,1,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.024915200471878052
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,32,1,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.007248000055551529
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,32,1,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.008921600133180618
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,32,1,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04376319944858551
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,32,1,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08325440287590027
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,64,1,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.008851200342178345
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,64,1,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,64,1,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.08284159898757934
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,64,1,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.16287679672241212
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,128,1,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.012992000579833985
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,128,1,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.017078399658203125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,128,1,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1607583999633789
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,128,1,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.31667840480804443
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,256,1,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.019340799748897554
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,256,1,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.3164063930511475
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,256,1,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.027353599667549133
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,256,1,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6268159866333007
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,512,1,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.03162879943847656
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,512,1,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.04928640127182007
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,512,1,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.6260447978973389
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,512,1,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.239452838897705
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1024,1,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.058057600259780885
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1024,1,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.09227520227432251
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1024,1,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.2444064140319824
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1024,1,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.472835159301758
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1,1,1,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0049472000449895855
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1,1,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.017212800681591034
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1,1,1,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.005488000065088272
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,2,1,2,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.006393600255250931
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,2,1,2,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0069023996591567995
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,4,1,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.00589120015501976
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,4,1,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01067200005054474
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,8,1,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.00671359971165657
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,8,1,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.015279999375343323
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,16,1,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.007849600166082382
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,16,1,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025183999538421632
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,32,1,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.00695360004901886
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,32,1,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.043968001008033754
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,64,1,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.009334400296211243
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,64,1,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.08297600150108338
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,128,1,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.012886400520801543
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,128,1,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1608672022819519
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,256,1,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.019049599766731262
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,256,1,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.31623680591583253
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,512,1,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.03151040077209473
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,512,1,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6258111953735351
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1024,1,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05817599892616272
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1024,1,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.244700813293457
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1,1,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1,1,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.018943999707698823
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2,2,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.025513601303100587
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2,2,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.01733119934797287
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2,2,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.026156800985336303
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2,2,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.021036800742149354
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.025302401185035704
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.026159998774528504
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.017561599612236023
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.021513600647449494
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.021107199788093566
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.025561600923538208
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.021139200031757354
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.02130880057811737
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.015107199549674988
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.021001599729061127
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0252703994512558
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.015302400290966033
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.021206399798393248
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.021382400393486024
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.025519999861717223
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1,1,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.019232000410556793
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,64,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.015276800096035003
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,64,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02325119972229004
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,64,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.02114560008049011
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,64,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.027353599667549133
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2,2,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01727039963006973
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,128,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.015059199929237366
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,128,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.021113599836826324
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,128,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.027321600914001466
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,128,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03107840120792389
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,256,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.015820799767971037
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,256,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.022140799462795256
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,256,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.04154239892959595
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,256,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.03135679960250855
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,512,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.02285439968109131
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,512,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0169855996966362
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,512,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.059680002927780154
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,512,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04601280093193054
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1024,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.017033599317073822
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1024,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.02423039972782135
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1024,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.06667199730873108
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1024,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.09473919868469238
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2048,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.025513601303100587
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2048,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03702079951763153
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2,2,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02187200039625168
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2048,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.11108479499816895
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2048,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.17643519639968872
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01759680062532425
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02218240052461624
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4096,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04207679927349091
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4096,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06390399932861328
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4096,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.21794559955596923
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4096,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.33534719944000246
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.015251199901103973
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02149759978055954
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8192,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07921280264854431
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8192,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.12095680236816406
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8192,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.4185184001922607
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8192,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6386720180511475
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.014985600113868713
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16384,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1500383973121643
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16384,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.22945599555969237
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16384,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.8084832191467285
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16384,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2488544464111329
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32768,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.29078719615936277
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32768,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.44534077644348147
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32768,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.5868543624877929
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32768,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.481760025024414
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1,2,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02958720028400421
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1,2,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.04612480103969574
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.021027199923992157
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01523520052433014
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0211776003241539
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,64,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01542080044746399
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,64,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.023545600473880768
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,128,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01541759967803955
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,128,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.027551999688148497
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,256,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.016259199380874632
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,256,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.03751679956912994
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1,2,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02446399927139282
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1,2,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.033302399516105655
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,512,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01693760007619858
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,512,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04357759952545166
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.029635199904441835
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.04577600061893463
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1024,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.01717119961977005
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1024,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.06591039896011353
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2048,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025788798928260803
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2048,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.10555839538574219
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4096,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04264959990978241
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4096,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.19495680332183837
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8192,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07977920174598693
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8192,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3665568113327026
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16384,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.15159039497375487
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16384,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.7018976211547852
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1,1,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.008697599917650223
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32768,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2918816089630127
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32768,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.3718976020812987
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1,2,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02985279858112335
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.023081600666046143
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.033302399516105655
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02927039861679077
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.043977600336074826
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.023104000091552734
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.033395200967788696
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03447360098361969
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02343360036611557
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0336544007062912
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02298240065574646
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03561280071735382
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.023366400599479677
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1,2,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.026438400149345398
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02975359857082367
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02489279955625534
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.033327999711036685
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.024115200340747833
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.023654399812221526
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03553600013256073
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03373439908027649
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,64,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.024428799748420715
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,64,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.027532801032066345
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02503040134906769
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,64,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.036099201440811156
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,64,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.037676799297332766
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,128,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.025206398963928223
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,128,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.03169600069522858
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025388801097869874
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,128,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03653759956359863
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,128,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.043961599469184875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,256,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.025135999917984007
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,256,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04574080109596253
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02344000041484833
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025545600056648254
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,256,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.036713600158691406
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,256,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06090239882469177
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,512,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.025209599733352663
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,512,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0662015974521637
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.023574399948120116
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,512,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.03802880048751831
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,512,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0971455991268158
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1024,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.02685759961605072
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1024,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1115488052368164
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1024,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.040089601278305055
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1024,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.18235839605331422
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2048,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04223679900169373
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2048,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.20961918830871581
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2048,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0661184012889862
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2048,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.34232640266418457
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4096,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07592319846153259
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4096,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.40307202339172366
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4096,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.11906239986419678
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4096,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6524223804473877
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8192,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.13957120180130006
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8192,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7777344226837158
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8192,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.22157759666442872
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16384,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2682176113128662
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8192,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2812607765197754
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025833600759506227
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16384,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.5289664268493652
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16384,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.42776641845703123
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02494720071554184
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025481599569320678
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32768,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5246655941009521
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16384,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.539654350280762
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32768,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,3.124729537963867
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,64,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02471359968185425
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,64,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02778240144252777
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0522271990776062
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32768,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.8370495796203613
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1,4,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.029679998755455017
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32768,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,5.140639877319336
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,128,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025388801097869874
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,128,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.03366400003433227
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.05262399911880493
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.031430399417877196
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08493120074272156
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1,4,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.047977599501609805
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,256,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02504960000514984
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,256,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04193280041217804
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.052537602186203
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.031455999612808226
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08485760092735291
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0495743989944458
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,512,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.025385600328445435
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,512,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.06534399986267089
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04005439877510071
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.031324800848960874
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08314560055732727
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.04926080107688904
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1024,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.02699199914932251
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1024,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.10763839483261109
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04017600119113922
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.03157120048999786
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06351040005683899
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2048,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04248639941215515
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.048147198557853696
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2048,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.19304319620132446
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.041529598832130435
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.03156799972057343
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0641215980052948
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.04803839921951294
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4096,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07646719813346863
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4096,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3606911897659302
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,64,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.041500800848007204
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,64,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.035920000076293944
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06507840156555175
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.048979198932647704
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8192,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.14082239866256713
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8192,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6917280197143555
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,128,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04207679927349091
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,128,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04599039852619171
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,64,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06572160124778748
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,64,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.055103999376296994
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16384,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.26851520538330076
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16384,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.3550592422485352
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,256,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04295360147953033
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,256,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.06835200190544129
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,128,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0662335991859436
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,128,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06627519726753235
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32768,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5259200096130371
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,512,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.044512000679969785
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32768,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.7322528839111326
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,512,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.11170560121536255
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,256,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.06717439889907836
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,256,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.10073280334472656
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05403519868850708
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1024,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1,4,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.035462400317192076
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1024,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.21029119491577147
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,512,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.07010560035705567
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,512,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.18290879726409912
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.054281598329544066
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.03749440014362335
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2048,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07919039726257324
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2048,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.40488638877868655
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1024,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.07514879703521729
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1024,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.3409663915634155
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05237759947776795
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.037334400415420535
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4096,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1403424024581909
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4096,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7809311866760253
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2048,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.1268895983695984
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2048,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6498271942138671
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.035913598537445066
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8192,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2609087944030762
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8192,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.5302207946777344
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4096,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.22459518909454346
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0407584011554718
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.03590719997882843
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4096,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2653632164001465
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16384,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5044127941131592
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16384,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,3.136662483215332
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0416159987449646
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.03749440014362335
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8192,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.42335357666015627
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8192,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.5048608779907227
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,64,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04200319945812225
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32768,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.986518383026123
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,64,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16384,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.8216671943664551
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32768,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,6.280121612548828
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16384,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,5.089395141601562
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,128,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0424703985452652
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,128,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.050220799446105954
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.09975680112838745
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1,8,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04357120096683502
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,256,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04323840141296387
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,256,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.06800000071525573
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32768,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.6166847229003907
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.10128639936447144
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,512,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.04493759870529175
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32768,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,10.557366180419923
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,512,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.10920640230178832
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0994592010974884
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0437855988740921
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1024,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0474592000246048
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.1639583945274353
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1024,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.19271039962768555
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1,8,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.07982400059700012
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07466560006141662
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04367679953575134
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2048,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07891520261764526
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2048,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3622080087661743
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.16313600540161133
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.07851200103759766
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07468479871749878
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.042243200540542605
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4096,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.14071040153503417
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4096,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6922688007354736
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.16101759672164917
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08049600124359131
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07587199807167053
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.04384320080280304
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8192,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2609760046005249
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.11969280242919922
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.07976959943771363
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8192,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.3542624473571778
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1,1,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.020931200683116914
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,64,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07750719785690308
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,64,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.052662402391433716
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.12058240175247192
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08004800081253052
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16384,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5038112163543701
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16384,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.763859176635742
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2,2,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0088639996945858
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,128,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0780672013759613
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,128,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.06977919936180114
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.12216639518737793
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.08074560165405273
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32768,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.9936960220336915
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,256,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07803519964218139
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,256,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1115488052368164
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32768,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,5.478249740600586
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,64,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.1256384015083313
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,64,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.0955839991569519
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,512,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.08109120130538941
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,512,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2114272117614746
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.10131200551986694
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1,8,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05468479990959167
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,128,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.12517759799957276
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,128,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.11039999723434449
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2,2,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023043200373649597
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1024,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.09019200205802917
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1024,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.40610241889953613
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.056518399715423585
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,256,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.12601280212402344
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,256,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.18748799562454224
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.009027200192213059
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023311999440193177
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2048,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.14908479452133178
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.09929280281066895
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2048,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7821311950683594
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05809280276298523
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,512,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.13261760473251344
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,512,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.3434112071990967
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.007452800124883652
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02141759991645813
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07478399872779846
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4096,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.26571838855743407
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05809919834136963
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4096,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.5441760063171386
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1024,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.14648319482803346
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1024,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6520800113677978
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.00719040036201477
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07448639869689941
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8192,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5008543968200684
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2048,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.24208641052246094
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2048,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2659968376159667
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8192,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,3.133462333679199
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07640640139579773
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0584384024143219
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4096,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4346752166748047
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16384,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.9730463981628418
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4096,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.5095455169677736
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,64,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07742720246315002
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,64,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0694208025932312
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16384,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,6.307574462890625
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8192,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.8267711639404297
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,128,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07776960134506225
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,128,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.08746240139007569
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02136960029602051
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8192,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,5.0923809051513675
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32768,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.9184320449829102
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,256,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.07870720028877258
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,256,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.11239360570907593
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.008787199854850769
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.022201600670814513
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32768,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,12.569996643066407
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16384,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.6097984313964844
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,512,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.08196480274200439
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,64,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.008611200004816055
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,512,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1951807975769043
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,64,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02173759937286377
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16384,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,10.560313415527343
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.20007679462432862
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,128,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.008912000060081481
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1024,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.08928959965705871
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,128,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1024,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3639456033706665
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,256,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.008975999802350998
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2048,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1493631958961487
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.32998080253601075
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2048,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6915008068084717
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,256,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.027164798974990845
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4096,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2665760040283203
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,512,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.008703999966382981
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,512,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02714560031890869
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4096,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.3505727767944335
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,1024,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0088128000497818
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,1024,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02943359911441803
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8192,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5041471958160401
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8192,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.7207231521606445
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,2048,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.011926399916410447
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1,16,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.06809599995613098
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,2048,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04365760087966919
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16384,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.9799776077270508
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16384,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,5.438351821899414
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.19396480321884155
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,4096,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.01817599982023239
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,4096,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07665600180625916
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1,16,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.1394528031349182
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32768,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.9274784088134767
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,8192,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.031139200925827025
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,8192,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.12979520559310914
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32768,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,10.860678100585938
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.3189152002334595
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,16384,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.05579839944839478
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,16384,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.25863358974456785
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.20131199359893798
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,1,32768,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1099552035331726
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,1,32768,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.5378047943115234
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1,2,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.011404799669981003
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.06993600130081176
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1919487953186035
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07031040191650391
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1421056032180786
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07023040056228638
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.13968640565872192
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.14374079704284667
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1,2,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1,16,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.09051520228385926
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.3155936002731323
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.14221760034561157
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.011452800035476685
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1936959981918335
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.23240320682525634
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.14338239431381225
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.23431680202484131
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07037119865417481
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02098879963159561
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,32,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.14614720344543458
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,32,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.07187839746475219
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.01130239963531494
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.021353599429130555
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.09889280200004577
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,64,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.14752320051193238
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,64,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.0879584014415741
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.009401600062847137
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023343999683856965
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.19140160083770752
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.09932159781455993
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,128,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.14767999649047853
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,128,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.11539839506149292
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.14184319972991943
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.010889600217342376
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.14210879802703857
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.0995519995689392
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,256,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.15009280443191528
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,32,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.23822400569915772
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,256,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.21552000045776368
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,32,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.14423999786376954
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1434015989303589
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,512,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.15944960117340087
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,64,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.2418463945388794
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,64,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.16898880004882813
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,512,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.4067039966583252
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,128,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.24316160678863524
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1024,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.17229119539260865
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,128,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.19555519819259642
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1024,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7817823886871338
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023027199506759643
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,256,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.24645440578460692
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2048,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2871328115463257
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,256,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.34809279441833496
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2048,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.531385612487793
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.01103999987244606
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023177599906921385
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,512,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.2617343902587891
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,512,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6569471836090088
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,64,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.010931199789047242
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,64,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02319999933242798
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4096,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5166368007659912
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4096,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,3.12542724609375
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.10042239427566528
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,128,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.011151999980211259
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,128,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02728320062160492
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1024,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.2824959993362427
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1024,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2741727828979492
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,32,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.14583359956741332
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,32,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.10105279684066773
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8192,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.9826399803161621
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8192,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,6.276764678955078
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,256,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.010908800363540649
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,256,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.027296000719070436
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2048,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.47273921966552734
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2048,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.511363220214844
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,64,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1478111982345581
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,64,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.12046079635620117
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,512,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.010921599715948105
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,512,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.027100801467895508
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16384,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.914169692993164
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,128,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.14842560291290283
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,128,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.15467519760131837
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4096,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.8572863578796387
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,1024,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.010889600217342376
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,1024,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04365119934082031
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4096,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,5.09197769165039
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16384,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,12.545526123046875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,256,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1505887985229492
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,256,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2057055950164795
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,2048,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.017097599804401398
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,2048,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0726751983165741
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8192,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.632476806640625
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,512,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.16009919643402098
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,512,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3673759937286377
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,4096,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.026675200462341307
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.3907520055770874
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,4096,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.119542396068573
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8192,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,10.633625793457032
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1,32,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1275488018989563
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1024,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1739743947982788
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1024,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6927167892456054
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,8192,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.046214398741722104
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,8192,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.2386080026626587
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.38517439365386963
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.12794239521026612
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2048,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.28715200424194337
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,16384,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0900704026222229
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,16384,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.47713279724121094
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2048,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.3540063858032227
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.3762880086898804
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.12528959512710572
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6490303993225097
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1,32,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.2597440004348755
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,2,32768,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1734336018562317
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4096,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5187232017517089
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,2,32768,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.9383008003234863
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2781951904296875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.12776639461517333
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4096,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.7487295150756834
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6394464015960694
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.26146240234375
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.017417599260807038
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023289600014686586
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,16,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.279964804649353
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,16,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1282655954360962
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8192,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.9879615783691407
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6245567798614502
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.26282880306243894
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8192,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,5.441206359863282
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.017312000691890716
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023481599986553192
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,32,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.285315203666687
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,32,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.13179839849472047
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4593952178955078
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.2634495973587036
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.01737920045852661
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02351360023021698
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16384,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.9251968383789062
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,64,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.28752319812774657
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,64,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.1601215958595276
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,16,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4631392002105713
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,16,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.263590407371521
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.014988799393177033
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023343999683856965
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16384,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,10.857071685791016
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,128,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.29081599712371825
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,128,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2199104070663452
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,32,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4698624134063721
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,32,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.2667232036590576
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02319999933242798
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,256,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.296777606010437
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,256,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.4110208034515381
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.39267520904541015
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1,32,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.16670080423355102
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,64,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4751904010772705
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.015043200552463531
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,64,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.31541121006011963
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023263999819755556
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,512,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.3107583999633789
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,512,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7837440013885498
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.38719360828399657
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,64,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.014841599762439728
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1854431986808777
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,128,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.48175678253173826
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,64,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02327679991722107
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,128,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.3655424118041992
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1024,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.3353503942489624
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1024,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.5352543830871581
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,128,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.015084800124168397
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,128,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.027382400631904603
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3772768020629883
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.18407679796218873
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,256,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.49021120071411134
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,256,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6695199966430664
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2048,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5633024215698242
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,256,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.015337599813938141
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,256,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02932800054550171
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2779936075210571
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2048,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,3.130054473876953
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.18494399785995483
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,512,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.5132863998413086
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,512,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2858976364135741
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,512,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.01648640036582947
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,512,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04176000058650971
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,16,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.28089919090271
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,16,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.1866976022720337
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4096,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.0199040412902831
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1024,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.5543615818023682
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1024,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.5255008697509767
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4096,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,6.274272155761719
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,1024,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.017001600563526155
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,1024,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07066559791564941
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,32,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2852031946182251
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,32,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.18906559944152831
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2048,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.9350144386291503
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,2048,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025622400641441345
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,2048,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.11753280162811279
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,64,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.28822400569915774
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2048,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,5.104169464111328
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8192,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.9452447891235352
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,64,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.22445120811462402
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,4096,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04193919897079468
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,4096,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.23230080604553222
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8192,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,12.546844482421875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,128,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2907072067260742
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,128,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.2914144039154053
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4096,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.7012447357177733
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,8192,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07813439965248108
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,8192,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.44495677947998047
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,256,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.29649920463562013
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,256,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.37792000770568845
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4096,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,10.555235290527344
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,16384,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1491328001022339
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7638432025909424
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,512,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.31141440868377684
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,512,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6992703914642334
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,16384,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.8719231605529785
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1,64,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2405344009399414
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1024,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3350656032562256
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,4,32768,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.28901760578155516
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7619935989379882
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1024,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.357107162475586
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2,128,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.24087040424346923
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,4,32768,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.7246623992919923
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2723008155822755
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1,64,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4959616184234619
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02934719920158386
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7490560054779053
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2048,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5637695789337158
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025190401077270507
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4,256,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.2426464080810547
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2048,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.730188751220703
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.268284797668457
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2,128,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4988895893096924
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.029500800371170043
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023747199773788454
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,8,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5483967781066894
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,8,512,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.24291520118713378
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4096,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.0262975692749023
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4096,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,5.446736145019531
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.2473312377929688
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4,256,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.4994336128234863
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.029091200232505797
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,16,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5526591777801514
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,16,1024,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.24544320106506348
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02322240024805069
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.024345600605010988
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,8,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.911616039276123
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,8,512,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.5006688117980957
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,32,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.562665605545044
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,32,2048,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.24786241054534913
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8192,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.9561216354370117
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023177599906921385
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023500800132751465
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8192,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,10.860870361328125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,16,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.9195008277893066
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,16,1024,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.5052864074707031
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,64,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5704383850097656
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,64,4096,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.30450561046600344
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.024233600497245787
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025206398963928223
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,32,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.9324928283691406
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,32,2048,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.5140575885772705
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,128,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.578985595703125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,128,8192,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.4184607982635498
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,64,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.023657600581645965
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,64,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02550399899482727
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.7670112133026123
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,64,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.9480959892272949
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,64,4096,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.6068064212799072
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,256,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.5844639778137207
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1,64,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3134176015853882
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,256,16384,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.7934304237365722
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,128,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.02459840029478073
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,128,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.029462400078773498
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,128,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.9611136436462402
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,128,8192,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.7040512084960937
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.7653567790985107
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,512,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.6128064155578613
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2,128,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.34767038822174073
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,512,32768,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.5438176155090333
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,256,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.024508799612522125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,256,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04184640049934387
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,256,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,0.9682751655578613
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.7513152122497558
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4,256,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.34927680492401125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,256,16384,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.3100735664367675
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1024,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,0.6576767921447754
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,512,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025276800990104674
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,512,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07049919962882996
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1024,65536,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,3.1378496170043944
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,8,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.54967041015625
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,8,512,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.3490015983581543
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,512,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.0181183815002441
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,1024,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0263264000415802
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,1024,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.11673599481582642
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,512,32768,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,2.549148750305176
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2048,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,1.1144864082336425
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2048,131072,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,6.283545684814453
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,16,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5541728019714356
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,16,1024,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.35157439708709715
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,2048,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0417248010635376
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,2048,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.2280128002166748
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1024,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.0953120231628417
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1024,65536,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,5.130073547363281
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,32,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5624351978302002
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,32,2048,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.35438721179962157
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,4096,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07465599775314331
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,4096,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.44413437843322756
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4096,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,2.0286144256591796
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,64,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5695072174072265
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2048,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,1.8593984603881837
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,64,4096,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.42490558624267577
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,8192,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.13871999979019164
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4096,262144,2688,128,4,64,64,8,128,NEMOTRON_H_3_Nano,12.542249298095703
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,8192,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.8710335731506348
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2048,131072,4096,128,4,128,64,8,128,NEMOTRON_H_3_Super,10.581929779052734
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,128,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5787456035614014
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,128,8192,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5569248199462891
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,16384,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.2678911924362183
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,16384,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.7172704696655274
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,256,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.5838399887084961
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,256,16384,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.7178336143493652
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,8,32768,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.5184639930725098
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,8,32768,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,3.4071071624755858
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,512,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.6141632080078125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,512,32768,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.3711071968078614
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.054092800617218016
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1024,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,0.659830379486084
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1024,65536,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.7397216796875
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2048,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,1.1206048011779786
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2048,131072,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,5.456707382202149
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.035369598865509035
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4096,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,2.039619255065918
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.05195519924163818
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4096,262144,8192,128,4,64,64,8,256,MAMBA2_GENERIC_4K,10.87014389038086
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03617919981479645
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.05200639963150024
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03685120046138764
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.039654400944709775
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03581759929656982
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03967039883136749
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03531199991703034
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,32,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04095039963722229
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,32,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03570240139961243
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,64,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.040940800309181215
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,64,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.03617280125617981
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,128,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.041407999396324155
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,128,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04370880126953125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,256,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.042444801330566405
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,256,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07275840044021606
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,512,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04395520091056824
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,512,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1180575966835022
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,1024,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04692479968070984
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,1024,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.22777919769287108
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,2048,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07826240062713623
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,2048,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.44460158348083495
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,4096,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1385696053504944
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,4096,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.8702783584594727
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,8192,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.26020159721374514
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,8192,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.7278175354003906
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,16384,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.5019680023193359
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,16384,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,3.4094303131103514
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,16,32768,524288,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.9751104354858399
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,16,32768,524288,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,6.776927947998047
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1033471941947937
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.060166400671005246
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.10136640071868896
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.06020479798316956
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.09833920001983643
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.060249602794647215
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07306879758834839
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.06022080183029175
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,16,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07448959946632386
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,16,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,32,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07578240036964416
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,32,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.06076480150222778
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,64,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07698559761047363
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,64,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.06241599917411804
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,128,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07686399817466735
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,128,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.076528000831604
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,256,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.07755839824676514
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,256,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.11994880437850952
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,512,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.08121280074119568
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,512,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.22890560626983641
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,1024,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.08872320055961609
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,1024,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.4441567897796631
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,2048,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1478559970855713
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,2048,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.8654272079467773
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,4096,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.26528639793395997
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,4096,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.7029504776000977
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,8192,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.5020224094390869
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,8192,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,3.3844512939453124
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,16384,524288,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.9733375549316406
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,16384,524288,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,6.723468780517578
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,32,32768,1048576,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.9196447372436523
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,32,32768,1048576,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,13.41661376953125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1955839991569519
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.09892160296440125
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.19514559507369994
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.10101439952850341
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.19233280420303345
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1013759970664978
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,8,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.14197440147399903
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,8,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.10110399723052979
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,16,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.14266560077667237
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,16,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.09991040229797363
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,32,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1444704055786133
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,32,2048,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.10111039876937866
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,64,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.14721599817276002
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,64,4096,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.10531200170516967
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,128,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.14782079458236694
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,128,8192,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.12964799404144287
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1,1,1,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.004588799923658371
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,256,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.14888960123062134
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,256,16384,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.2345184087753296
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,512,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.15831680297851564
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,512,32768,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.4477407932281494
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1,1,1,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0047231998294591905
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,1024,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.1710271954536438
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,1024,65536,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.8672575950622559
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,2,1,2,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.005817599967122078
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,2048,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.28603520393371584
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,2048,131072,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.7203712463378906
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,2,1,2,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.004771199822425842
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,4,1,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0058432001620531086
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,4,1,4,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.005084799975156784
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,4096,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.517903995513916
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,4096,262144,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,3.381033706665039
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,8,1,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.00594559982419014
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,8,1,8,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.006489600241184235
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,8192,524288,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.9857024192810059
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,16,1,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.006719999760389328
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,8192,524288,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,6.724102020263672
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,16,1,16,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0069983996450901035
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,32,1,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.006716799736022949
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,32,1,32,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.010937599837779999
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_fn,context,64,16384,1048576,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,1.9200639724731445
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,64,1,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,64,1,64,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.015219199657440185
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,mamba_chunk_scan_combined,context,64,16384,1048576,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,13.403768920898438
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,128,1,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.007174400240182876
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,128,1,128,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.025417599081993102
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,256,1,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.00883840024471283
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,256,1,256,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.04396480023860931
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,512,1,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.012956799566745758
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,512,1,512,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.0831488013267517
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,causal_conv1d_update,generation,1024,1,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.01923519968986511
TRTLLM,1.2.0rc5,NVIDIA GB200,mamba2,selective_state_update,generation,1024,1,1024,1024,64,4,16,64,4,128,MAMBA2_GENERIC_1K,0.16286400556564332