Source code for cobra.stats.stats
#cobra.stats.py: A place to warehouse generic statistical functions and
#to abstract away more of the interface to R and other statistical tools
from numpy import mean, array, log10, isinf, sign, sum
from scipy import log
from scipy.stats import t, chi2, norm
[docs]def p_adjust(the_p_values, correction_method='bh'):
"""Adjusts the p-values in a list for multiple hypothesis testing.
the_p_values: a list of p-values
correction_method: String. 'bh'|'by'|'bonferroni'. 'bh' for
Benjamini & Hochberg, 'by' for Benjamini & Yekuieli, and
'bonferroni' for Bonferroni.
NOTE: 'bh' and 'by' require R and rpy2 installed
"""
correction_method = correction_method.lower()
if correction_method == 'bh' or correction_method == 'by':
correction_method = correction_method.upper()
if correction_method == 'bonferroni':
the_p_values = [min(1.0, x * float(len(the_p_values))) for x in the_p_values]
else:
#BUG: This may need to be reworked for the jython set up.
try:
from cobra.stats import r
except Exception as e:
print "Couldn't load cobra.r perhaps no rpy2 modules:%s'"%e
the_p_values = [float(x)
for x in list(array(r.adjust_p(array(the_p_values),
correction_method)))]
return the_p_values
[docs]def combine_p_values(the_p_values, method='z', default_quantile=7.):
"""Combines p-values from repeat measurements into a single
p-value.
the_p_values: a list of p-values.
method: String. 'z'|'fisher'. 'z' for using the weighted z-score.
'fisher' for using fisher's combined probability test.
default_quantile: Float. Only used for z method. The quantile to
use when the software's normal inverse cdf(p-value) is infinite
"""
if len(the_p_values) == 1 or sum(the_p_values) == 0:
combined_p_value = the_p_values[0]
elif method.lower() == 'z':
#combine p-values using weighted z-score. To not deal with inifinite
#values replace
the_quantiles = []
for the_p in the_p_values:
the_quantile = norm.ppf(1.-the_p)
if isinf(the_quantile):
the_quantile = default_quantile
the_quantiles.append(the_quantile)
combined_p_value = norm.sf(sum(the_quantiles) / len(the_quantiles)**0.5)
elif method.lower() == 'fisher':
combined_p_value = 1-chi2.cdf(-2*sum(map(log,
the_p_values)),
2*len(the_p_values))
return combined_p_value
[docs]def error_weighted(the_means, the_stds):
"""Calculate the error-weighted mean and standard deviation.
the_means: A list or numpy array of floats.
the_stds: A list or numpy array of floats.
"""
#Allow the input to be a list or an array
mean_list = array(the_means).tolist()
std_list = array(the_stds).tolist()
if len(mean_list) == 1:
return (mean_list[0], std_list[0])
the_variances = array(map(float, the_stds))**2
weighted_std = (1/sum(1/the_variances))**0.5
weighted_mean = sum(array(the_means)/the_variances) / sum(1/the_variances)
return (weighted_mean, weighted_std)