-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls                            Input Shapes  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------  
                                          ProfilerStep*         0.00%       0.000us         0.00%       0.000us       0.000us       4.369ms      1810.08%       4.369ms       1.456ms           0 B           0 B           0 B           0 B             3                                      []  
                                          ProfilerStep*        49.02%       2.687ms        99.93%       5.478ms       1.826ms       0.000us         0.00%     126.908us      42.303us           0 B         -72 B           0 B      -4.31 MB             3                                      []  
torchfx::sequential_biquad_kernel(double const*, dou...         0.00%       0.000us         0.00%       0.000us       0.000us     109.728us        45.46%     109.728us      36.576us           0 B           0 B           0 B           0 B             3                                      []  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      34.269us        14.20%      34.269us       1.428us           0 B           0 B           0 B           0 B            24                                      []  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      31.137us        12.90%      31.137us       1.483us           0 B           0 B           0 B           0 B            21                                      []  
                                             aten::add_         2.72%     149.231us         4.59%     251.518us      13.973us      26.144us        10.83%      26.144us       1.452us           0 B           0 B           0 B           0 B            18                      [[512], [512], []]  
                                              aten::mul         3.92%     215.039us         6.44%     353.119us      19.618us      25.438us        10.54%      25.438us       1.413us           0 B           0 B      36.00 KB      36.00 KB            18                             [[512], []]  
                                            aten::copy_         1.63%      89.111us         3.12%     171.133us      19.015us      23.903us         9.90%      23.903us       2.656us           0 B           0 B           0 B           0 B             9                [[2, 512], [2, 512], []]  
void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      19.645us         8.14%      19.645us       2.183us           0 B           0 B           0 B           0 B             9                                      []  
                                               aten::to         0.25%      13.577us         3.73%     204.488us      34.081us       0.000us         0.00%      18.239us       3.040us           0 B           0 B      36.00 KB           0 B             6              [[2, 512], [], [], [], []]  
                                         aten::_to_copy         0.71%      39.109us         3.48%     190.911us      31.819us       0.000us         0.00%      18.239us       3.040us           0 B           0 B      36.00 KB           0 B             6      [[2, 512], [], [], [], [], [], []]  
                                         aten::_to_copy         1.19%      65.261us         6.47%     354.482us      39.387us       0.000us         0.00%      13.887us       1.543us          72 B           0 B       3.00 KB           0 B             9           [[3], [], [], [], [], [], []]  
                                            aten::copy_         1.57%      86.032us         3.96%     217.239us      24.138us      13.887us         5.75%      13.887us       1.543us           0 B           0 B           0 B           0 B             9                          [[3], [3], []]  
                                               aten::to         0.28%      15.331us         4.88%     267.636us      44.606us       0.000us         0.00%      10.686us       1.781us           0 B           0 B       3.00 KB           0 B             6                   [[3], [], [], [], []]  
void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       9.280us         3.84%       9.280us       3.093us           0 B           0 B           0 B           0 B             3                                      []  
                                              aten::mul         1.19%      65.316us         1.84%     100.973us      16.829us       8.831us         3.66%       8.831us       1.472us           0 B           0 B       1.67 MB       1.67 MB             6                        [[2, 36512], []]  
                                            aten::zeros         0.95%      52.251us         4.25%     233.156us      38.859us       0.000us         0.00%       6.656us       1.109us           0 B           0 B       1.67 MB           0 B             6                    [[], [], [], [], []]  
                                            aten::zero_         0.68%      37.478us         2.56%     140.288us      23.381us       0.000us         0.00%       6.656us       1.109us           0 B           0 B           0 B           0 B             6                            [[2, 36512]]  
                                            aten::fill_         0.80%      44.000us         1.88%     102.810us      17.135us       6.656us         2.76%       6.656us       1.109us           0 B           0 B           0 B           0 B             6                        [[2, 36512], []]  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       6.656us         2.76%       6.656us       1.109us           0 B           0 B           0 B           0 B             6                                      []  
                                              aten::cat         1.23%      67.650us         1.79%      98.026us      32.675us       6.400us         2.65%       6.400us       2.133us           0 B           0 B       1.50 KB       1.50 KB             3                                [[], []]  
void at::native::(anonymous namespace)::CatArrayBatc...         0.00%       0.000us         0.00%       0.000us       0.000us       6.400us         2.65%       6.400us       2.133us           0 B           0 B           0 B           0 B             3                                      []  
void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.664us         2.35%       5.664us       1.888us           0 B           0 B           0 B           0 B             3                                      []  
                                             aten::item         0.49%      26.795us         3.52%     193.141us      12.876us       0.000us         0.00%       5.504us       0.367us           0 B           0 B           0 B           0 B            15                                    [[]]  
                              aten::_local_scalar_dense         1.08%      59.275us         3.03%     166.346us      11.090us       5.504us         2.28%       5.504us       0.367us           0 B           0 B           0 B           0 B            15                                    [[]]  
                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us       5.504us         2.28%       5.504us       0.917us           0 B           0 B           0 B           0 B             6                                      []  
                                             aten::flip         1.02%      55.663us         2.09%     114.806us      38.269us       5.152us         2.13%       5.152us       1.717us           0 B           0 B       1.50 KB           0 B             3                            [[2, 2], []]  
void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       5.152us         2.13%       5.152us       1.717us           0 B           0 B           0 B           0 B             3                                      []  
                                              aten::add         0.66%      36.359us         0.98%      53.568us      17.856us       4.993us         2.07%       4.993us       1.664us           0 B           0 B     856.50 KB     856.50 KB             3            [[2, 36512], [2, 36512], []]  
torchfx::forcing_kernel(double const*, double const*...         0.00%       0.000us         0.00%       0.000us       0.000us       4.736us         1.96%       4.736us       1.579us           0 B           0 B           0 B           0 B             3                                      []  
                                               aten::to         0.10%       5.624us         1.97%     107.801us      35.934us       0.000us         0.00%       3.201us       1.067us          72 B           0 B           0 B           0 B             3       [[3], [], [], [], [], [], [], []]  
                       Memcpy DtoH (Device -> Pageable)         0.00%       0.000us         0.00%       0.000us       0.000us       3.201us         1.33%       3.201us       1.067us           0 B           0 B           0 B           0 B             3                                      []  
                                    aten::empty_strided         2.32%     127.265us         2.32%     127.265us       7.070us       0.000us         0.00%       0.000us       0.000us          72 B          72 B      40.50 KB      40.50 KB            18                [[], [], [], [], [], []]  
                                       cudaLaunchKernel        11.29%     618.971us        11.29%     618.971us       7.936us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B            78                                      []  
                                           aten::select         1.26%      69.130us         1.54%      84.226us       5.615us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B            15                           [[3], [], []]  
                                       aten::as_strided         0.28%      15.096us         0.28%      15.096us       1.006us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B            15                       [[3], [], [], []]  
                                        cudaMemcpyAsync         2.31%     126.367us         2.31%     126.367us      14.041us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B             9                                      []  
                                  cudaStreamSynchronize         0.65%      35.859us         0.65%      35.859us       3.984us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B             9                                      []  
                                           aten::detach         0.12%       6.353us         0.36%      19.758us       6.586us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B             3                                   [[3]]  
                                                 detach         0.24%      13.405us         0.24%      13.405us       4.468us       0.000us         0.00%       0.000us       0.000us           0 B           0 B           0 B           0 B             3                                   [[3]]  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------  
Self CPU time total: 5.482ms
Self CUDA time total: 241.372us
