Source code for cocoatree.statistics
from . import position
from . import pairwise
from .. import msa
from ..__params import __freq_regularization_ref
[docs]
def compute_all_frequencies(sequences,
seq_weights=None,
freq_regul=__freq_regularization_ref):
"""
Compute frequencies on sequences
Parameters
----------
sequences : list of sequences
seq_weights : {None, np.ndarray (n_seq)}
if None, will re-compute the sequence weights.
freq_regul : regularization parameter (default=__freq_regularization_ref)
Returns
-------
aa_freqs : np.ndarray (nseq, 21)
A (nseq, 21) ndarray containing the amino acid frequencies at each
positions.
bkgd_freqs : np.ndarray (21, )
A (21,) np.array containing the background amino acid frequencies
at each position; it is computed from the mean frequency of amino acid
a in all proteins in the NCBI non-redundant database
(see Rivoire et al., https://dx.plos.org/10.1371/journal.pcbi.1004817)
aa_joint_freqs : np.ndarray (nseq, nseq, 21, 21)
An ndarray containing the pairwise joint frequencies of amino acids
for each pair of positions in the list of provided sequences.
"""
if seq_weights is None:
seq_weights, _ = msa.compute_seq_weights(sequences)
aa_freqs = position._compute_aa_freqs(
sequences,
freq_regul=freq_regul,
seq_weights=seq_weights)
bkgd_freqs = position._compute_background_freqs(
aa_freqs,
sequences,
seq_weights=seq_weights,
freq_regul=__freq_regularization_ref)
aa_joint_freqs = pairwise._compute_aa_joint_freqs(
sequences,
seq_weights=seq_weights,
freq_regul=freq_regul)
return aa_freqs, bkgd_freqs, aa_joint_freqs