Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2"""Base classes for statistical test results 

3 

4Created on Mon Apr 22 14:03:21 2013 

5 

6Author: Josef Perktold 

7""" 

8from statsmodels.compat.python import lzip 

9import numpy as np 

10 

11 

12class AllPairsResults(object): 

13 '''Results class for pairwise comparisons, based on p-values 

14 

15 Parameters 

16 ---------- 

17 pvals_raw : array_like, 1-D 

18 p-values from a pairwise comparison test 

19 all_pairs : list of tuples 

20 list of indices, one pair for each comparison 

21 multitest_method : str 

22 method that is used by default for p-value correction. This is used 

23 as default by the methods like if the multiple-testing method is not 

24 specified as argument. 

25 levels : {list[str], None} 

26 optional names of the levels or groups 

27 n_levels : None or int 

28 If None, then the number of levels or groups is inferred from the 

29 other arguments. It can be explicitly specified, if the inferred 

30 number is incorrect. 

31 

32 Notes 

33 ----- 

34 This class can also be used for other pairwise comparisons, for example 

35 comparing several treatments to a control (as in Dunnet's test). 

36 

37 ''' 

38 

39 def __init__(self, pvals_raw, all_pairs, multitest_method='hs', 

40 levels=None, n_levels=None): 

41 self.pvals_raw = pvals_raw 

42 self.all_pairs = all_pairs 

43 if n_levels is None: 

44 # for all_pairs nobs*(nobs-1)/2 

45 self.n_levels = np.max(all_pairs) + 1 

46 else: 

47 self.n_levels = n_levels 

48 

49 self.multitest_method = multitest_method 

50 self.levels = levels 

51 if levels is None: 

52 self.all_pairs_names = ['%r' % (pairs,) for pairs in all_pairs] 

53 else: 

54 self.all_pairs_names = ['%s-%s' % (levels[pairs[0]], 

55 levels[pairs[1]]) 

56 for pairs in all_pairs] 

57 

58 def pval_corrected(self, method=None): 

59 '''p-values corrected for multiple testing problem 

60 

61 This uses the default p-value correction of the instance stored in 

62 ``self.multitest_method`` if method is None. 

63 

64 ''' 

65 import statsmodels.stats.multitest as smt 

66 if method is None: 

67 method = self.multitest_method 

68 # TODO: breaks with method=None 

69 return smt.multipletests(self.pvals_raw, method=method)[1] 

70 

71 def __str__(self): 

72 return self.summary() 

73 

74 def pval_table(self): 

75 '''create a (n_levels, n_levels) array with corrected p_values 

76 

77 this needs to improve, similar to R pairwise output 

78 ''' 

79 k = self.n_levels 

80 pvals_mat = np.zeros((k, k)) 

81 # if we do not assume we have all pairs 

82 pvals_mat[lzip(*self.all_pairs)] = self.pval_corrected() 

83 return pvals_mat 

84 

85 def summary(self): 

86 '''returns text summarizing the results 

87 

88 uses the default pvalue correction of the instance stored in 

89 ``self.multitest_method`` 

90 ''' 

91 import statsmodels.stats.multitest as smt 

92 maxlevel = max((len(ss) for ss in self.all_pairs_names)) 

93 

94 text = ('Corrected p-values using %s p-value correction\n\n' 

95 % smt.multitest_methods_names[self.multitest_method]) 

96 text += 'Pairs' + (' ' * (maxlevel - 5 + 1)) + 'p-values\n' 

97 text += '\n'.join(('%s %6.4g' % (pairs, pv) for (pairs, pv) in 

98 zip(self.all_pairs_names, self.pval_corrected()))) 

99 return text