LICENSE
README.md
setup.py
fla/__init__.py
fla/utils.py
fla/layers/__init__.py
fla/layers/abc.py
fla/layers/attn.py
fla/layers/based.py
fla/layers/bitattn.py
fla/layers/delta_net.py
fla/layers/gla.py
fla/layers/gsa.py
fla/layers/hgrn.py
fla/layers/hgrn2.py
fla/layers/linear_attn.py
fla/layers/multiscale_retention.py
fla/layers/rebased.py
fla/layers/rwkv6.py
fla/layers/simple_gla.py
fla/models/__init__.py
fla/models/utils.py
fla/models/abc/__init__.py
fla/models/abc/configuration_abc.py
fla/models/abc/modeling_abc.py
fla/models/bitnet/__init__.py
fla/models/bitnet/configuration_bitnet.py
fla/models/bitnet/modeling_bitnet.py
fla/models/delta_net/__init__.py
fla/models/delta_net/configuration_delta_net.py
fla/models/delta_net/modeling_delta_net.py
fla/models/gla/__init__.py
fla/models/gla/configuration_gla.py
fla/models/gla/modeling_gla.py
fla/models/gsa/__init__.py
fla/models/gsa/configuration_gsa.py
fla/models/gsa/modeling_gsa.py
fla/models/hgrn/__init__.py
fla/models/hgrn/configuration_hgrn.py
fla/models/hgrn/modeling_hgrn.py
fla/models/hgrn2/__init__.py
fla/models/hgrn2/configuration_hgrn2.py
fla/models/hgrn2/modeling_hgrn2.py
fla/models/linear_attn/__init__.py
fla/models/linear_attn/configuration_linear_attn.py
fla/models/linear_attn/modeling_linear_attn.py
fla/models/mamba/__init__.py
fla/models/mamba/configuration_mamba.py
fla/models/mamba/modeling_mamba.py
fla/models/mamba2/__init__.py
fla/models/mamba2/configuration_mamba2.py
fla/models/mamba2/modeling_mamba2.py
fla/models/retnet/__init__.py
fla/models/retnet/configuration_retnet.py
fla/models/retnet/modeling_retnet.py
fla/models/rwkv6/__init__.py
fla/models/rwkv6/configuration_rwkv6.py
fla/models/rwkv6/modeling_rwkv6.py
fla/models/samba/__init__.py
fla/models/samba/configuration_samba.py
fla/models/samba/modeling_samba.py
fla/models/transformer/__init__.py
fla/models/transformer/configuration_transformer.py
fla/models/transformer/modeling_transformer.py
fla/modules/__init__.py
fla/modules/activations.py
fla/modules/convolution.py
fla/modules/feature_map.py
fla/modules/fused_bitlinear.py
fla/modules/fused_cross_entropy.py
fla/modules/fused_kl_div.py
fla/modules/fused_linear_cross_entropy.py
fla/modules/fused_norm_gate.py
fla/modules/l2norm.py
fla/modules/layernorm.py
fla/modules/layernorm_gated.py
fla/modules/rotary.py
fla/ops/__init__.py
fla/ops/rotary.py
fla/ops/abc/__init__.py
fla/ops/abc/chunk.py
fla/ops/abc/naive.py
fla/ops/based/__init__.py
fla/ops/based/fused_chunk.py
fla/ops/based/naive.py
fla/ops/based/parallel.py
fla/ops/common/__init__.py
fla/ops/common/chunk_h.py
fla/ops/common/fused_recurrent.py
fla/ops/delta_rule/__init__.py
fla/ops/delta_rule/chunk.py
fla/ops/delta_rule/fused_chunk.py
fla/ops/delta_rule/fused_recurrent.py
fla/ops/delta_rule/naive.py
fla/ops/delta_rule/parallel.py
fla/ops/delta_rule/wy_fast.py
fla/ops/generalized_delta_rule/__init__.py
fla/ops/generalized_delta_rule/iplr/__init__.py
fla/ops/generalized_delta_rule/iplr/fused_recurrent.py
fla/ops/generalized_delta_rule/iplr/naive.py
fla/ops/gla/__init__.py
fla/ops/gla/chunk.py
fla/ops/gla/fused_chunk.py
fla/ops/gla/fused_recurrent.py
fla/ops/gla/naive.py
fla/ops/gsa/__init__.py
fla/ops/gsa/chunk.py
fla/ops/gsa/fused_recurrent.py
fla/ops/gsa/naive.py
fla/ops/hgrn/__init__.py
fla/ops/hgrn/chunk.py
fla/ops/hgrn/fused_recurrent.py
fla/ops/hgrn/naive.py
fla/ops/linear_attn/__init__.py
fla/ops/linear_attn/chunk.py
fla/ops/linear_attn/fused_chunk.py
fla/ops/linear_attn/fused_recurrent.py
fla/ops/linear_attn/naive.py
fla/ops/linear_attn/utils.py
fla/ops/rebased/__init__.py
fla/ops/rebased/naive.py
fla/ops/rebased/parallel.py
fla/ops/retention/__init__.py
fla/ops/retention/chunk.py
fla/ops/retention/fused_chunk.py
fla/ops/retention/fused_recurrent.py
fla/ops/retention/naive.py
fla/ops/retention/parallel.py
fla/ops/rwkv4/__init__.py
fla/ops/rwkv4/fused_recurrent.py
fla/ops/rwkv6/__init__.py
fla/ops/rwkv6/chunk.py
fla/ops/rwkv6/chunk_naive.py
fla/ops/rwkv6/fused_recurrent.py
fla/ops/rwkv6/recurrent_naive.py
fla/ops/rwkv7/__init__.py
fla/ops/rwkv7/channel_mixing.py
fla/ops/rwkv7/chunk_naive.py
fla/ops/rwkv7/fused_recurrent.py
fla/ops/rwkv7/recurrent_naive.py
fla/ops/simple_gla/__init__.py
fla/ops/simple_gla/chunk.py
fla/ops/simple_gla/fused_recurrent.py
fla/ops/simple_gla/naive.py
fla/ops/simple_gla/parallel.py
fla/ops/utils/__init__.py
fla/ops/utils/cumsum.py
fla/ops/utils/logcumsumexp.py
fla/ops/utils/logsumexp.py
fla/ops/utils/matmul.py
fla/ops/utils/softmax.py
rwkv_fla.egg-info/PKG-INFO
rwkv_fla.egg-info/SOURCES.txt
rwkv_fla.egg-info/dependency_links.txt
rwkv_fla.egg-info/requires.txt
rwkv_fla.egg-info/top_level.txt
rwkvfla/__init__.py
rwkvfla/utils.py
rwkvfla/layers/__init__.py
rwkvfla/layers/abc.py
rwkvfla/layers/attn.py
rwkvfla/layers/based.py
rwkvfla/layers/bitattn.py
rwkvfla/layers/delta_net.py
rwkvfla/layers/gla.py
rwkvfla/layers/gsa.py
rwkvfla/layers/hgrn.py
rwkvfla/layers/hgrn2.py
rwkvfla/layers/linear_attn.py
rwkvfla/layers/multiscale_retention.py
rwkvfla/layers/rebased.py
rwkvfla/layers/rwkv6.py
rwkvfla/layers/simple_gla.py
rwkvfla/models/__init__.py
rwkvfla/models/utils.py
rwkvfla/models/abc/__init__.py
rwkvfla/models/abc/configuration_abc.py
rwkvfla/models/abc/modeling_abc.py
rwkvfla/models/bitnet/__init__.py
rwkvfla/models/bitnet/configuration_bitnet.py
rwkvfla/models/bitnet/modeling_bitnet.py
rwkvfla/models/delta_net/__init__.py
rwkvfla/models/delta_net/configuration_delta_net.py
rwkvfla/models/delta_net/modeling_delta_net.py
rwkvfla/models/gla/__init__.py
rwkvfla/models/gla/configuration_gla.py
rwkvfla/models/gla/modeling_gla.py
rwkvfla/models/gsa/__init__.py
rwkvfla/models/gsa/configuration_gsa.py
rwkvfla/models/gsa/modeling_gsa.py
rwkvfla/models/hgrn/__init__.py
rwkvfla/models/hgrn/configuration_hgrn.py
rwkvfla/models/hgrn/modeling_hgrn.py
rwkvfla/models/hgrn2/__init__.py
rwkvfla/models/hgrn2/configuration_hgrn2.py
rwkvfla/models/hgrn2/modeling_hgrn2.py
rwkvfla/models/linear_attn/__init__.py
rwkvfla/models/linear_attn/configuration_linear_attn.py
rwkvfla/models/linear_attn/modeling_linear_attn.py
rwkvfla/models/mamba/__init__.py
rwkvfla/models/mamba/configuration_mamba.py
rwkvfla/models/mamba/modeling_mamba.py
rwkvfla/models/mamba2/__init__.py
rwkvfla/models/mamba2/configuration_mamba2.py
rwkvfla/models/mamba2/modeling_mamba2.py
rwkvfla/models/retnet/__init__.py
rwkvfla/models/retnet/configuration_retnet.py
rwkvfla/models/retnet/modeling_retnet.py
rwkvfla/models/rwkv6/__init__.py
rwkvfla/models/rwkv6/configuration_rwkv6.py
rwkvfla/models/rwkv6/modeling_rwkv6.py
rwkvfla/models/samba/__init__.py
rwkvfla/models/samba/configuration_samba.py
rwkvfla/models/samba/modeling_samba.py
rwkvfla/models/transformer/__init__.py
rwkvfla/models/transformer/configuration_transformer.py
rwkvfla/models/transformer/modeling_transformer.py
rwkvfla/modules/__init__.py
rwkvfla/modules/activations.py
rwkvfla/modules/convolution.py
rwkvfla/modules/feature_map.py
rwkvfla/modules/fused_bitlinear.py
rwkvfla/modules/fused_cross_entropy.py
rwkvfla/modules/fused_kl_div.py
rwkvfla/modules/fused_linear_cross_entropy.py
rwkvfla/modules/fused_norm_gate.py
rwkvfla/modules/l2norm.py
rwkvfla/modules/layernorm.py
rwkvfla/modules/layernorm_gated.py
rwkvfla/modules/rotary.py
rwkvfla/ops/__init__.py
rwkvfla/ops/rotary.py
rwkvfla/ops/abc/__init__.py
rwkvfla/ops/abc/chunk.py
rwkvfla/ops/abc/naive.py
rwkvfla/ops/based/__init__.py
rwkvfla/ops/based/fused_chunk.py
rwkvfla/ops/based/naive.py
rwkvfla/ops/based/parallel.py
rwkvfla/ops/common/__init__.py
rwkvfla/ops/common/chunk_h.py
rwkvfla/ops/common/fused_recurrent.py
rwkvfla/ops/delta_rule/__init__.py
rwkvfla/ops/delta_rule/chunk.py
rwkvfla/ops/delta_rule/fused_chunk.py
rwkvfla/ops/delta_rule/fused_recurrent.py
rwkvfla/ops/delta_rule/naive.py
rwkvfla/ops/delta_rule/parallel.py
rwkvfla/ops/delta_rule/wy_fast.py
rwkvfla/ops/generalized_delta_rule/__init__.py
rwkvfla/ops/generalized_delta_rule/iplr/__init__.py
rwkvfla/ops/generalized_delta_rule/iplr/fused_recurrent.py
rwkvfla/ops/generalized_delta_rule/iplr/naive.py
rwkvfla/ops/gla/__init__.py
rwkvfla/ops/gla/chunk.py
rwkvfla/ops/gla/fused_chunk.py
rwkvfla/ops/gla/fused_recurrent.py
rwkvfla/ops/gla/naive.py
rwkvfla/ops/gsa/__init__.py
rwkvfla/ops/gsa/chunk.py
rwkvfla/ops/gsa/fused_recurrent.py
rwkvfla/ops/gsa/naive.py
rwkvfla/ops/hgrn/__init__.py
rwkvfla/ops/hgrn/chunk.py
rwkvfla/ops/hgrn/fused_recurrent.py
rwkvfla/ops/hgrn/naive.py
rwkvfla/ops/linear_attn/__init__.py
rwkvfla/ops/linear_attn/chunk.py
rwkvfla/ops/linear_attn/fused_chunk.py
rwkvfla/ops/linear_attn/fused_recurrent.py
rwkvfla/ops/linear_attn/naive.py
rwkvfla/ops/linear_attn/utils.py
rwkvfla/ops/rebased/__init__.py
rwkvfla/ops/rebased/naive.py
rwkvfla/ops/rebased/parallel.py
rwkvfla/ops/retention/__init__.py
rwkvfla/ops/retention/chunk.py
rwkvfla/ops/retention/fused_chunk.py
rwkvfla/ops/retention/fused_recurrent.py
rwkvfla/ops/retention/naive.py
rwkvfla/ops/retention/parallel.py
rwkvfla/ops/rwkv4/__init__.py
rwkvfla/ops/rwkv4/fused_recurrent.py
rwkvfla/ops/rwkv6/__init__.py
rwkvfla/ops/rwkv6/chunk.py
rwkvfla/ops/rwkv6/chunk_naive.py
rwkvfla/ops/rwkv6/fused_recurrent.py
rwkvfla/ops/rwkv6/recurrent_naive.py
rwkvfla/ops/rwkv7/__init__.py
rwkvfla/ops/rwkv7/channel_mixing.py
rwkvfla/ops/rwkv7/chunk_naive.py
rwkvfla/ops/rwkv7/fused_recurrent.py
rwkvfla/ops/rwkv7/recurrent_naive.py
rwkvfla/ops/simple_gla/__init__.py
rwkvfla/ops/simple_gla/chunk.py
rwkvfla/ops/simple_gla/fused_recurrent.py
rwkvfla/ops/simple_gla/naive.py
rwkvfla/ops/simple_gla/parallel.py
rwkvfla/ops/utils/__init__.py
rwkvfla/ops/utils/cumsum.py
rwkvfla/ops/utils/logcumsumexp.py
rwkvfla/ops/utils/logsumexp.py
rwkvfla/ops/utils/matmul.py
rwkvfla/ops/utils/softmax.py
tests/test_fused_chunk.py
tests/test_padding.py