Coverage for /Users/mschultzberg/PycharmProjects/confidence/spotify_confidence/analysis/frequentist/confidence_computers/generic_computer.py : 75%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# Copyright 2017-2020 Spotify AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.
ConfidenceComputerABC level2str, listify, add_nim_columns, validate_and_rename_nims, validate_and_rename_column, add_mde_columns, get_all_categorical_group_columns, get_all_group_columns, validate_data, remove_group_columns) DIFFERENCE, P_VALUE, SFX1, SFX2, STD_ERR, ALPHA, ADJUSTED_ALPHA, POWER, POWERED_EFFECT, ADJUSTED_POWER, ADJUSTED_P, ADJUSTED_LOWER, ADJUSTED_UPPER, IS_SIGNIFICANT, REQUIRED_SAMPLE_SIZE, NULL_HYPOTHESIS, NIM, PREFERENCE, PREFERENCE_TEST, TWO_SIDED, PREFERENCE_DICT, BONFERRONI, HOLM, HOMMEL, SIMES_HOCHBERG, SIDAK, HOLM_SIDAK, FDR_BH, FDR_BY, FDR_TSBH, FDR_TSBKY, SPOT_1_HOLM, SPOT_1_HOMMEL, SPOT_1_SIMES_HOCHBERG, SPOT_1_SIDAK, SPOT_1_HOLM_SIDAK, SPOT_1_FDR_BH, SPOT_1_FDR_BY, SPOT_1_FDR_TSBH, SPOT_1_FDR_TSBKY, BONFERRONI_ONLY_COUNT_TWOSIDED, BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, SPOT_1, CORRECTION_METHODS, BOOTSTRAP, CHI2, TTEST, ZTEST, NIM_TYPE, CORRECTION_METHODS_THAT_REQUIRE_METRIC_INFO)
return bounds(t, alpha, rho=2, ztrun=8, sides=sides, max_nints=1000)
numerator_sum_squares_column: str, denominator_column: str, categorical_group_columns: Union[str, Iterable], ordinal_group_column: str, interval_size: float, correction_method: str, method_column: str, bootstrap_samples_column: str, metric_column: Union[str, None], treatment_column: Union[str, None], power: float):
data_frame[denominator_column]).all(): # Treat as binomial data else: raise ValueError( f'numerator_sum_squares_column missing or same as ' f'numerator_column, but since {numerator_column} is not ' f'always smaller than {denominator_column} it can\'t be ' f'binomial data. Please check your data.')
categorical_group_columns, metric_column, treatment_column)
raise ValueError(f'Use one of the correction methods ' + f'in {CORRECTION_METHODS}')
if data_frame.groupby(self._metric_column).ngroups == 1: self._categorical_group_columns = remove_group_columns(self._categorical_group_columns, self._metric_column) else: self._single_metric = False
self._categorical_group_columns, self._ordinal_group_column)
or ZTEST in self._df[self._method_column]): columns_that_must_exist += [self._numerator, self._denominator] columns_that_must_exist += [] if self._numerator_sumsq is None else [self._numerator_sumsq] columns_that_must_exist += [self._bootstrap_samples_column]
CHI2: ChiSquaredComputer(self._numerator, self._numerator_sumsq, self._denominator, self._ordinal_group_column, self._interval_size), TTEST: TTestComputer(self._numerator, self._numerator_sumsq, self._denominator, self._ordinal_group_column, self._interval_size), ZTEST: ZTestComputer(self._numerator, self._numerator_sumsq, self._denominator, self._ordinal_group_column, self._interval_size), BOOTSTRAP: BootstrapComputer(self._bootstrap_samples_column, self._interval_size), }
def _confidence_computers(self):
return ( self._sufficient_statistics if verbose else self._sufficient_statistics[ self._all_group_columns + [c for c in [self._numerator, self._denominator] if c is not None] + [POINT_ESTIMATE, CI_LOWER, CI_UPPER] ] )
self._df .assign(**{POINT_ESTIMATE: self._point_estimate}) .assign(**{VARIANCE: self._variance}) .pipe(self._add_point_estimate_ci) )
level_1: Union[str, Iterable], level_2: Union[str, Iterable], absolute: bool, groupby: Union[str, Iterable], nims: NIM_TYPE, mdes: bool, final_expected_sample_size_column: str, verbose: bool) -> DataFrame: [(level_1, level_2)], absolute, groupby, level_as_reference=True, nims=nims, mdes=mdes, final_expected_sample_size_column=final_expected_sample_size_column) difference_df[listify(groupby) + ['level_1', 'level_2', 'absolute_difference', DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE] + [ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT, POWERED_EFFECT, REQUIRED_SAMPLE_SIZE] + ([NIM, NULL_HYPOTHESIS, PREFERENCE] if nims is not None else [])])
level: Union[str, Iterable], absolute: bool, groupby: Union[str, Iterable], level_as_reference: bool, nims: NIM_TYPE, minimum_detectable_effect: bool, final_expected_sample_size_column: str, verbose: bool) -> DataFrame: .groupby(level_columns).groups.keys() if other != level] levels, absolute, groupby, level_as_reference, nims, minimum_detectable_effect, final_expected_sample_size_column) difference_df[listify(groupby) + ['level_1', 'level_2', 'absolute_difference', DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE, POWERED_EFFECT, REQUIRED_SAMPLE_SIZE] + [ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT] + ([NIM, NULL_HYPOTHESIS, PREFERENCE] if nims is not None else [])])
levels: List[Tuple], absolute: bool, groupby: Union[str, Iterable], nims: NIM_TYPE, final_expected_sample_size_column: str, verbose: bool ) -> DataFrame: level_columns = get_remaning_groups(self._all_group_columns, groupby) difference_df = self._compute_differences( level_columns, [levels] if type(levels) == tuple else levels, absolute, groupby, level_as_reference=True, nims=nims, final_expected_sample_size_column=final_expected_sample_size_column) return (difference_df if verbose else difference_df[listify(groupby) + ['level_1', 'level_2', 'absolute_difference', DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE] + [ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT, POWERED_EFFECT, REQUIRED_SAMPLE_SIZE] + ([NIM, NULL_HYPOTHESIS, PREFERENCE] if nims is not None else [])])
level_columns: Iterable, levels: Union[str, Iterable], absolute: bool, groupby: Union[str, Iterable], level_as_reference: bool, nims: NIM_TYPE, mdes: bool, final_expected_sample_size_column: str): raise ValueError( f'level_is_reference must be either True or False, but is {level_as_reference}.') level_columns, unique_levels) [self._sufficient_statistics.groupby(level_columns).get_group(group) for group in unique_levels]) if level_as_reference else (level2str(l[1]), level2str(l[0])) for l in levels] self._sufficient_statistics .assign(level=self._sufficient_statistics[level_columns] .agg(level2str, axis='columns')) .pipe(lambda df: df if groupby == [] else df.set_index(groupby)) .pipe(self._create_comparison_df, groups_to_compare=levels, absolute=absolute, nims=nims, mdes=mdes, final_expected_sample_size_column=final_expected_sample_size_column, filtered_sufficient_statistics=filtered_sufficient_statistics) .assign(level_1=lambda df: df['level_1'].map(lambda s: str2level[s])) .assign(level_2=lambda df: df['level_2'].map(lambda s: str2level[s])) .reset_index() .sort_values(by=groupby + ['level_1', 'level_2']) )
df: DataFrame, groups_to_compare: List[Tuple[str, str]], absolute: bool, nims: NIM_TYPE, mdes: bool, final_expected_sample_size_column: str, filtered_sufficient_statistics: DataFrame ) -> DataFrame:
# self-join on index (the index will typically model the date, # i.e., rows with the same date are joined) left_index=True, right_index=True, suffixes=(SFX1, SFX2)) else: # join on dummy column, i.e. conduct a cross join df.assign(dummy_join_column=1) .merge(right=df.assign(dummy_join_column=1), on='dummy_join_column', suffixes=(SFX1, SFX2)) .drop(columns='dummy_join_column') )
df.pipe(add_nim_columns, nims=nims) .pipe(add_mde_columns, mdes=mdes) .pipe(join) .query(f'level_1 in {[l1 for l1, l2 in groups_to_compare]} and ' + f'level_2 in {[l2 for l1, l2 in groups_to_compare]}' + 'and level_1 != level_2') .pipe(validate_and_rename_nims) .pipe(validate_and_rename_column, final_expected_sample_size_column) .pipe(validate_and_rename_column, self._method_column) .assign(**{DIFFERENCE: lambda df: df[POINT_ESTIMATE + SFX2] - df[POINT_ESTIMATE + SFX1]}) .assign(**{STD_ERR: self._std_err}) .pipe(self._add_p_value_and_ci, final_expected_sample_size_column=final_expected_sample_size_column, filtered_sufficient_statistics=filtered_sufficient_statistics) .pipe(self._adjust_if_absolute, absolute=absolute) .pipe(self._add_adjusted_power) .apply(self._powered_effect_and_required_sample_size, axis=1) .assign(**{PREFERENCE: lambda df: df[PREFERENCE].map(PREFERENCE_DICT)}))
else: return ( df.assign(absolute_difference=absolute) .assign(**{DIFFERENCE: df[DIFFERENCE] / df[POINT_ESTIMATE + SFX1]}) .assign(**{CI_LOWER: df[CI_LOWER] / df[POINT_ESTIMATE + SFX1]}) .assign(**{CI_UPPER: df[CI_UPPER] / df[POINT_ESTIMATE + SFX1]}) .assign(**{ADJUSTED_LOWER: df[ADJUSTED_LOWER] / df[POINT_ESTIMATE + SFX1]}) .assign(**{ADJUSTED_UPPER: df[ADJUSTED_UPPER] / df[POINT_ESTIMATE + SFX1]}) .assign(**{NULL_HYPOTHESIS: df[NULL_HYPOTHESIS] / df[POINT_ESTIMATE + SFX1]}) )
return np.sqrt(df[VARIANCE + SFX1] / df[self._denominator + SFX1] + df[VARIANCE + SFX2] / df[self._denominator + SFX2])
number_of_guardrail_metrics: int) -> int: return number_of_guardrail_metrics if number_of_success_metrics == 0 else \ number_of_guardrail_metrics + 1
else: self._number_total_metrics = 1 if self._single_metric else df.groupby( self._metric_column).ngroups if self._single_metric: if df[df[NIM].isnull()].shape[0] > 0: self._number_success_metrics = 1 else: self._number_success_metrics = 0 else: self._number_success_metrics = df[df[NIM].isnull()].groupby( self._metric_column).ngroups
self._number_guardrail_metrics = self._number_total_metrics - \ self._number_success_metrics power_correction = self._corrections_power( number_of_guardrail_metrics=self._number_guardrail_metrics, number_of_success_metrics=self._number_success_metrics) return df.assign(**{ADJUSTED_POWER: 1 - (1 - df[POWER]) / power_correction}) else:
df: DataFrame, final_expected_sample_size_column: str, filtered_sufficient_statistics: DataFrame) -> DataFrame:
df.assign( **{ALPHA: df.apply(lambda row: 2 * alpha_0 if self._correction_method == SPOT_1 and row[PREFERENCE] != TWO_SIDED else alpha_0, axis=1)}) .assign(**{POWER: self._power}) .assign(**{PREFERENCE_TEST: df.apply( lambda row: TWO_SIDED if self._correction_method == SPOT_1 else row[PREFERENCE], axis=1)}) )
if self._correction_method not in [BONFERRONI, BONFERRONI_ONLY_COUNT_TWOSIDED, BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, SPOT_1]: raise ValueError( f"{self._correction_method} not supported for sequential tests. Use one of" f"{BONFERRONI}, {BONFERRONI_ONLY_COUNT_TWOSIDED}, " f"{BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY}, {SPOT_1}")
groups_except_ordinal = [ column for column in df.index.names if column != self._ordinal_group_column] n_comparisons = self._get_num_comparisons(df, self._correction_method, ['level_1', 'level_2'] + groups_except_ordinal)
df[ADJUSTED_ALPHA] = self._compute_sequential_adjusted_alpha(df, final_expected_sample_size_column, filtered_sufficient_statistics, n_comparisons) df[IS_SIGNIFICANT] = df[P_VALUE] < df[ADJUSTED_ALPHA] df[P_VALUE] = None df[ADJUSTED_P] = None SIDAK, HOLM_SIDAK, FDR_BH, FDR_BY, FDR_TSBH, FDR_TSBKY, SPOT_1_HOLM, SPOT_1_HOMMEL, SPOT_1_SIMES_HOCHBERG, SPOT_1_SIDAK, SPOT_1_HOLM_SIDAK, SPOT_1_FDR_BH, SPOT_1_FDR_BY, SPOT_1_FDR_TSBH, SPOT_1_FDR_TSBKY]: else:
column is not None] self._correction_method, groupby) alpha=1 - self._interval_size, method=correction_method) BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, SPOT_1]: column is not None] lambda row: min(row[P_VALUE] * n_comparisons * (1 + (row[PREFERENCE_TEST] == 'two-sided')), 1), axis=1) # df[ADJUSTED_P] = df[P_VALUE].map(lambda p: min(p * n_comparisons , 1)) else: raise ValueError("Can't figure out which correction method to use :(")
columns=[CI_LOWER, CI_UPPER], data=list(ci.values))
SPOT_1_HOLM, SPOT_1_HOMMEL, SPOT_1_SIMES_HOCHBERG] \ and all(df[PREFERENCE_TEST] != TWO_SIDED): df, correction_method=self._correction_method, alpha=1 - self._interval_size, ) BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, SPOT_1, SPOT_1_HOLM, SPOT_1_HOMMEL, SPOT_1_SIMES_HOCHBERG, SPOT_1_SIDAK, SPOT_1_HOLM_SIDAK, SPOT_1_FDR_BH, SPOT_1_FDR_BY, SPOT_1_FDR_TSBH, SPOT_1_FDR_TSBKY]: else:
columns=[ADJUSTED_LOWER, ADJUSTED_UPPER], data=list(adjusted_ci.values))
df.assign(**{CI_LOWER: ci_df[CI_LOWER]}) .assign(**{CI_UPPER: ci_df[CI_UPPER]}) .assign(**{ADJUSTED_LOWER: adjusted_ci_df[ADJUSTED_LOWER]}) .assign(**{ADJUSTED_UPPER: adjusted_ci_df[ADJUSTED_UPPER]}) )
df.pipe(set_alpha_and_adjust_preference) .assign(**{P_VALUE: lambda df: df.apply(self._p_value, axis=1)}) .pipe(_add_adjusted_p_and_is_significant) .pipe(_add_ci) )
groupby: Iterable) -> int: SIDAK, HOLM_SIDAK, FDR_BH, FDR_BY, FDR_TSBH, FDR_TSBKY]: SPOT_1_HOLM, SPOT_1_HOMMEL, SPOT_1_SIMES_HOCHBERG, SPOT_1_SIDAK, SPOT_1_HOLM_SIDAK, SPOT_1_FDR_BH, SPOT_1_FDR_BY, SPOT_1_FDR_TSBH, SPOT_1_FDR_TSBKY]: else: if self._single_metric: if df[df[NIM].isnull()].shape[0] > 0: self._number_success_metrics = 1 else: self._number_success_metrics = 0 else: self._number_success_metrics = df[df[NIM].isnull()].groupby( self._metric_column).ngroups
number_comparions = len((df[self._treatment_column + SFX1] + df[self._treatment_column + SFX2]).unique()) number_segments = (1 if len(self._segments) is 0 or not all(item in df.index.names for item in self._segments) else df.groupby(self._segments).ngroups)
return max(1, number_comparions * max(1, self._number_success_metrics) * number_segments) else: raise ValueError(f"Unsupported correction method: {correction_method}.")
"""Calculated the achieved power of test of differences between level 1 and level 2 given a targeted MDE.
Args: level_1 (str, tuple of str): Name of first level. level_2 (str, tuple of str): Name of second level. mde (float): Absolute minimal detectable effect size. alpha (float): Type I error rate, cutoff value for determining statistical significance. groupby (str): Name of column. If specified, will return the difference for each level of the grouped dimension.
Returns: Pandas DataFrame with the following columns: - level_1: Name of level 1. - level_2: Name of level 2. - power: 1 - B, where B is the likelihood of a Type II (false negative) error.
""" groupby = listify(groupby) level_columns = get_remaning_groups(self._all_group_columns, groupby) return ( self._compute_differences(level_columns, [(level_1, level_2)], True, groupby, level_as_reference=True, nims=None, # TODO: IS this right? final_expected_sample_size_column=None) # TODO: IS this # right? .pipe(lambda df: df if groupby == [] else df.set_index(groupby)) .assign(achieved_power=lambda df: df.apply(self._achieved_power, mde=mde, alpha=alpha, axis=1)) )[['level_1', 'level_2', 'achieved_power']]
axis=1)
else:
row: Series, mde: float, alpha: float) -> DataFrame: return self._confidence_computers[row[self._method_column]]._achieved_power(row, mde, alpha)
df: DataFrame, final_expected_sample_size_column: str, filtered_sufficient_statistics: DataFrame, n_comparisons: int) -> Series: if all(df[self._method_column] == 'z-test'): return self._confidence_computers['z-test']._compute_sequential_adjusted_alpha( df, final_expected_sample_size_column, filtered_sufficient_statistics, n_comparisons) else: raise NotImplementedError("Sequential testing is only supported for z-tests")
self, df: DataFrame, correction_method: str, alpha: float, w: float = 1.0, ) -> Tuple[Union[Series, float], Union[Series, float]]: df, correction_method, alpha, w) else: raise NotImplementedError(f"{self._correction_method} is only supported for ZTests")
|