1from typing import Tuple, Dict
2
3import numpy as np
4from pandas import DataFrame, Series
5
6from spotify_confidence.analysis.constants import CI_LOWER, CI_UPPER, SFX1, SFX2, BOOTSTRAPS, INTERVAL_SIZE
7
8
9def point_estimate(df: DataFrame, arg_dict: Dict[str, str]) -> float:
10 bootstrap_samples = arg_dict[BOOTSTRAPS]
11 return df[bootstrap_samples].map(lambda a: a.mean())
12
13
14def variance(df: Series, arg_dict: Dict[str, str]) -> float:
15 bootstrap_samples = arg_dict[BOOTSTRAPS]
16 variance = df[bootstrap_samples].map(lambda a: a.var())
17
18 if (variance < 0).any():
19 raise ValueError("Computed variance is negative. " "Please check your inputs.")
20 return variance
21
22
23def std_err(row: Series, arg_dict: Dict[str, str]) -> float:
24 return None
25
26
27def add_point_estimate_ci(df: DataFrame, arg_dict: Dict[str, str]) -> Series:
28 bootstrap_samples = arg_dict[BOOTSTRAPS]
29 interval_size = arg_dict[INTERVAL_SIZE]
30 df[CI_LOWER] = df[bootstrap_samples].map(lambda a: np.percentile(a, 100 * (1 - interval_size) / 2))
31 df[CI_UPPER] = df[bootstrap_samples].map(lambda a: np.percentile(a, 100 * (1 - (1 - interval_size) / 2)))
32 return df
33
34
35def p_value(row, arg_dict: Dict[str, str]) -> float:
36 return -1
37
38
39def ci(df, alpha_column: str, arg_dict: Dict[str, str]) -> Tuple[Series, Series]:
40 bootstrap_samples = arg_dict[BOOTSTRAPS]
41 lower = df.apply(
42 lambda row: np.percentile(
43 row[bootstrap_samples + SFX2] - row[bootstrap_samples + SFX1], 100 * row[alpha_column] / 2
44 ),
45 axis=1,
46 )
47 upper = df.apply(
48 lambda row: np.percentile(
49 row[bootstrap_samples + SFX2] - row[bootstrap_samples + SFX1], 100 * (1 - row[alpha_column] / 2)
50 ),
51 axis=1,
52 )
53 return lower, upper
54
55
56def achieved_power(df: DataFrame, mde: float, alpha: float) -> DataFrame:
57 return None