Coverage for /Users/sebastiana/Documents/Sugarpills/confidence/spotify_confidence/analysis/frequentist/confidence_computers/chi_squared_computer.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

43 statements  

1from typing import Tuple, Dict 

2 

3import numpy as np 

4from pandas import DataFrame, Series 

5from statsmodels.stats.proportion import proportion_confint, proportions_chisquare, confint_proportions_2indep 

6 

7from spotify_confidence.analysis.confidence_utils import power_calculation 

8from spotify_confidence.analysis.constants import ( 

9 NUMERATOR, 

10 DENOMINATOR, 

11 INTERVAL_SIZE, 

12 POINT_ESTIMATE, 

13 VARIANCE, 

14 CI_LOWER, 

15 CI_UPPER, 

16 SFX1, 

17 SFX2, 

18) 

19 

20 

21def point_estimate(df: DataFrame, arg_dict: Dict[str, str]) -> float: 

22 numerator = arg_dict[NUMERATOR] 

23 denominator = arg_dict[DENOMINATOR] 

24 if (df[denominator] == 0).any(): 

25 raise ValueError("""Can't compute point estimate: denominator is 0""") 

26 return df[numerator] / df[denominator] 

27 

28 

29def variance(df: DataFrame, arg_dict: Dict[str, str]) -> Series: 

30 variance = df[POINT_ESTIMATE] * (1 - df[POINT_ESTIMATE]) 

31 if (variance < 0).any(): 

32 raise ValueError(f"Computed variance is negative: {variance}. " "Please check your inputs.") 

33 return variance 

34 

35 

36def std_err(df: DataFrame, arg_dict: Dict[str, str]) -> Series: 

37 denominator = arg_dict[DENOMINATOR] 

38 return np.sqrt(df[VARIANCE + SFX1] / df[denominator + SFX1] + df[VARIANCE + SFX2] / df[denominator + SFX2]) 

39 

40 

41def add_point_estimate_ci(df: DataFrame, arg_dict: Dict[str, str]) -> Series: 

42 numerator = arg_dict[NUMERATOR] 

43 denominator = arg_dict[DENOMINATOR] 

44 interval_size = arg_dict[INTERVAL_SIZE] 

45 df[CI_LOWER], df[CI_UPPER] = proportion_confint( 

46 count=df[numerator], 

47 nobs=df[denominator], 

48 alpha=1 - interval_size, 

49 ) 

50 return df 

51 

52 

53def p_value(df: DataFrame, arg_dict: Dict[str, str]) -> Series: 

54 n1, n2 = arg_dict[NUMERATOR] + SFX1, arg_dict[NUMERATOR] + SFX2 

55 d1, d2 = arg_dict[DENOMINATOR] + SFX1, arg_dict[DENOMINATOR] + SFX2 

56 

57 def p_value_row(row): 

58 _, p_value, _ = proportions_chisquare( 

59 count=[row[n1], row[n2]], 

60 nobs=[row[d1], row[d2]], 

61 ) 

62 return p_value 

63 

64 return df.apply(p_value_row, axis=1) 

65 

66 

67def ci(df: DataFrame, alpha_column: str, arg_dict: Dict[str, str]) -> Tuple[Series, Series]: 

68 n1, n2 = arg_dict[NUMERATOR] + SFX1, arg_dict[NUMERATOR] + SFX2 

69 d1, d2 = arg_dict[DENOMINATOR] + SFX1, arg_dict[DENOMINATOR] + SFX2 

70 return confint_proportions_2indep( 

71 count1=df[n2], 

72 nobs1=df[d2], 

73 count2=df[n1], 

74 nobs2=df[d1], 

75 alpha=df[alpha_column], 

76 compare="diff", 

77 method="wald", 

78 ) 

79 

80 

81def achieved_power(df: DataFrame, mde: float, alpha: float, arg_dict: Dict[str, str]) -> DataFrame: 

82 n1, n2 = arg_dict[NUMERATOR] + SFX1, arg_dict[NUMERATOR] + SFX2 

83 d1, d2 = arg_dict[DENOMINATOR] + SFX1, arg_dict[DENOMINATOR] + SFX2 

84 

85 pooled_prop = (df[n1] + df[n2]) / (df[d1] + df[d2]) 

86 var_pooled = pooled_prop * (1 - pooled_prop) 

87 

88 return power_calculation(mde, var_pooled, alpha, df[d1], df[d2])