1# Copyright 2017-2020 Spotify AB
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from typing import Union, Iterable, Tuple, Dict, List
16
17from pandas import DataFrame
18
19from spotify_confidence.analysis.frequentist.confidence_computers.generic_computer import GenericComputer
20from .chartify_grapher import ChartifyGrapher
21from ..abstract_base_classes.confidence_abc import ConfidenceABC
22from ..abstract_base_classes.confidence_computer_abc import ConfidenceComputerABC
23from ..abstract_base_classes.confidence_grapher_abc import ConfidenceGrapherABC
24from ..confidence_utils import (
25 validate_categorical_columns,
26 listify,
27 get_all_categorical_group_columns,
28 get_all_group_columns,
29)
30from ..constants import BONFERRONI, NIM_TYPE, METHODS
31from ..frequentist.sample_ratio_test import sample_ratio_test
32from ...chartgrid import ChartGrid
33
34
35class Experiment(ConfidenceABC):
36 def __init__(
37 self,
38 data_frame: DataFrame,
39 numerator_column: str,
40 numerator_sum_squares_column: Union[str, None],
41 denominator_column: str,
42 categorical_group_columns: Union[str, Iterable],
43 ordinal_group_column: Union[str, None] = None,
44 interval_size: float = 0.95,
45 correction_method: str = BONFERRONI,
46 confidence_computer: ConfidenceComputerABC = None,
47 confidence_grapher: ConfidenceGrapherABC = None,
48 method_column: str = None,
49 bootstrap_samples_column: str = None,
50 metric_column=None,
51 treatment_column=None,
52 power: float = 0.8,
53 feature_column: str = None,
54 feature_sum_squares_column: str = None,
55 feature_cross_sum_column: str = None,
56 ):
57
58 validate_categorical_columns(categorical_group_columns)
59 self._df = data_frame
60 self._numerator = numerator_column
61 self._numerator_sumsq = numerator_sum_squares_column
62 self._denominator = denominator_column
63 self._categorical_group_columns = get_all_categorical_group_columns(
64 categorical_group_columns, metric_column, treatment_column
65 )
66 self._ordinal_group_column = ordinal_group_column
67 self._metric_column = metric_column
68 self._treatment_column = treatment_column
69 self._all_group_columns = get_all_group_columns(self._categorical_group_columns, self._ordinal_group_column)
70 if method_column is None:
71 raise ValueError("method column cannot be None")
72 if not all(self._df[method_column].map(lambda m: m in METHODS)):
73 raise ValueError(f"Values of method column must be in {METHODS}")
74
75 if confidence_computer is not None:
76 self._confidence_computer = confidence_computer
77 else:
78 self._confidence_computer = GenericComputer(
79 data_frame=data_frame,
80 numerator_column=numerator_column,
81 numerator_sum_squares_column=numerator_sum_squares_column,
82 denominator_column=denominator_column,
83 categorical_group_columns=listify(categorical_group_columns),
84 ordinal_group_column=ordinal_group_column,
85 interval_size=interval_size,
86 correction_method=correction_method.lower(),
87 method_column=method_column,
88 bootstrap_samples_column=bootstrap_samples_column,
89 metric_column=metric_column,
90 treatment_column=treatment_column,
91 power=power,
92 point_estimate_column=None,
93 var_column=None,
94 is_binary_column=None,
95 feature_column=feature_column,
96 feature_sum_squares_column=feature_sum_squares_column,
97 feature_cross_sum_column=feature_cross_sum_column,
98 )
99
100 self._confidence_grapher = (
101 confidence_grapher
102 if confidence_grapher is not None
103 else ChartifyGrapher(
104 data_frame=self._df,
105 numerator_column=self._numerator,
106 denominator_column=self._denominator,
107 categorical_group_columns=self._categorical_group_columns,
108 ordinal_group_column=self._ordinal_group_column,
109 )
110 )
111
112 def summary(self, verbose: bool = False) -> DataFrame:
113 return self._confidence_computer.compute_summary(verbose)
114
115 def difference(
116 self,
117 level_1: Union[str, Tuple],
118 level_2: Union[str, Tuple],
119 absolute: bool = True,
120 groupby: Union[str, Iterable] = None,
121 non_inferiority_margins: NIM_TYPE = None,
122 final_expected_sample_size_column: str = None,
123 verbose: bool = False,
124 minimum_detectable_effects_column: str = None,
125 ) -> DataFrame:
126 self._validate_sequential(final_expected_sample_size_column, groupby)
127
128 return self._confidence_computer.compute_difference(
129 level_1,
130 level_2,
131 absolute,
132 groupby,
133 non_inferiority_margins,
134 final_expected_sample_size_column,
135 verbose,
136 minimum_detectable_effects_column,
137 )
138
139 def differences(
140 self,
141 levels: Union[Tuple, List[Tuple]],
142 absolute: bool = True,
143 groupby: Union[str, Iterable] = None,
144 non_inferiority_margins: NIM_TYPE = None,
145 final_expected_sample_size_column: str = None,
146 verbose: bool = False,
147 minimum_detectable_effects_column: str = None,
148 ) -> DataFrame:
149 self._validate_sequential(final_expected_sample_size_column, groupby)
150 return self._confidence_computer.compute_differences(
151 levels,
152 absolute,
153 groupby,
154 non_inferiority_margins,
155 final_expected_sample_size_column,
156 verbose,
157 minimum_detectable_effects_column,
158 )
159
160 def multiple_difference(
161 self,
162 level: Union[str, Tuple],
163 absolute: bool = True,
164 groupby: Union[str, Iterable] = None,
165 level_as_reference: bool = None,
166 non_inferiority_margins: NIM_TYPE = None,
167 final_expected_sample_size_column: str = None,
168 verbose: bool = False,
169 minimum_detectable_effects_column: str = None,
170 ) -> DataFrame:
171 self._validate_sequential(final_expected_sample_size_column, groupby)
172
173 return self._confidence_computer.compute_multiple_difference(
174 level,
175 absolute,
176 groupby,
177 level_as_reference,
178 non_inferiority_margins,
179 final_expected_sample_size_column,
180 verbose,
181 minimum_detectable_effects_column,
182 )
183
184 def summary_plot(self, groupby: Union[str, Iterable] = None) -> ChartGrid:
185 summary_df = self.summary()
186 graph = self._confidence_grapher.plot_summary(summary_df, groupby)
187 return graph
188
189 def difference_plot(
190 self,
191 level_1: Union[str, Tuple],
192 level_2: Union[str, Tuple],
193 absolute: bool = True,
194 groupby: Union[str, Iterable] = None,
195 non_inferiority_margins: NIM_TYPE = None,
196 use_adjusted_intervals: bool = False,
197 final_expected_sample_size_column: str = None,
198 ) -> ChartGrid:
199 difference_df = self.difference(
200 level_1=level_1,
201 level_2=level_2,
202 absolute=absolute,
203 groupby=groupby,
204 non_inferiority_margins=non_inferiority_margins,
205 final_expected_sample_size_column=final_expected_sample_size_column,
206 )
207 chartgrid = self._confidence_grapher.plot_difference(
208 difference_df, absolute, groupby, non_inferiority_margins, use_adjusted_intervals
209 )
210 return chartgrid
211
212 def differences_plot(
213 self,
214 levels: List[Tuple],
215 absolute: bool = True,
216 groupby: Union[str, Iterable] = None,
217 non_inferiority_margins: NIM_TYPE = None,
218 use_adjusted_intervals: bool = False,
219 final_expected_sample_size_column: str = None,
220 ) -> ChartGrid:
221 difference_df = self.differences(
222 levels, absolute, groupby, non_inferiority_margins, final_expected_sample_size_column
223 )
224 chartgrid = self._confidence_grapher.plot_differences(
225 difference_df, absolute, groupby, non_inferiority_margins, use_adjusted_intervals
226 )
227 return chartgrid
228
229 def multiple_difference_plot(
230 self,
231 level: Union[str, Tuple],
232 absolute: bool = True,
233 groupby: Union[str, Iterable] = None,
234 level_as_reference: bool = False,
235 non_inferiority_margins: NIM_TYPE = None,
236 use_adjusted_intervals: bool = False,
237 final_expected_sample_size_column: str = None,
238 ) -> ChartGrid:
239 difference_df = self.multiple_difference(
240 level,
241 absolute,
242 groupby,
243 level_as_reference,
244 non_inferiority_margins,
245 None,
246 final_expected_sample_size_column,
247 )
248 chartgrid = self._confidence_grapher.plot_multiple_difference(
249 difference_df, absolute, groupby, level_as_reference, non_inferiority_margins, use_adjusted_intervals
250 )
251 return chartgrid
252
253 def sample_ratio_test(self, expected_proportions: Dict) -> Tuple[float, DataFrame]:
254 return sample_ratio_test(
255 self._df,
256 all_group_columns=self._all_group_columns,
257 denominator=self._denominator,
258 expected_proportions=expected_proportions,
259 )
260
261 def achieved_power(self, level_1, level_2, mde, alpha, groupby=None) -> DataFrame:
262 """Calculated the achieved power of test of differences between
263 level 1 and level 2 given a targeted MDE.
264
265 Args:
266 level_1 (str, tuple of str): Name of first level.
267 level_2 (str, tuple of str): Name of second level.
268 mde (float): Absolute minimal detectable effect size.
269 alpha (float): Type I error rate, cutoff value for determining
270 statistical significance.
271 groupby (str): Name of column.
272 If specified, will return the difference for each level
273 of the grouped dimension.
274
275 Returns:
276 Pandas DataFrame with the following columns:
277 - level_1: Name of level 1.
278 - level_2: Name of level 2.
279 - power: 1 - B, where B is the likelihood of a Type II (false
280 negative) error.
281
282 """
283 return self._confidence_computer.achieved_power(level_1, level_2, mde, alpha, groupby)
284
285 def _validate_sequential(self, final_expected_sample_size: float, groupby: Union[str, Iterable]):
286 if final_expected_sample_size is not None:
287 if self._ordinal_group_column not in listify(groupby):
288 raise ValueError(
289 f"{self._ordinal_group_column} must be in groupby argument to use "
290 f"sequential testing with final_expected_sample_size"
291 )