Coverage for /Users/mschultzberg/PycharmProjects/confidence/spotify_confidence/analysis/confidence_utils.py : 63%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# Copyright 2017-2020 Spotify AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.
INCREASE_PREFFERED, DECREASE_PREFFERED, TWO_SIDED, NIM_TYPE, NIM_INPUT_COLUMN_NAME, PREFERRED_DIRECTION_INPUT_NAME, NIM, NULL_HYPOTHESIS, PREFERENCE,MDE,ALTERNATIVE_HYPOTHESIS, SFX1, SFX2, POINT_ESTIMATE,MDE_INPUT_COLUMN_NAME, BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY)
additional_column: str) -> Iterable:
additional_column: str) -> Iterable: del od[additional_column]
categorical_group_columns: Union[str, Iterable]) -> Iterable: else: raise TypeError("""categorical_group_columns must be string or iterable (list of columns) and you must provide at least one""")
some_groups: Iterable) -> Iterable: else: group for group in all_groups if group not in some_groups and group is not None ]
metric_column: Union[str, None], treatment_column: Union[str, None]) -> Iterable:
level_columns: Union[str, Iterable], levels: Iterable): except (KeyError, ValueError): raise ValueError(""" Invalid level: '{}' Must supply a level within the ungrouped dimensions: {} Valid levels: {} """.format( level, level_columns, list(df.groupby(level_columns).groups.keys())))
mde_value = None if (type(mde[0]) is float and np.isnan(mde[0])) else mde[0] if mde[1] is None or (type(mde[1]) is float and np.isnan(mde[1])): return (mde[0], mde_value, TWO_SIDED) elif mde[1].lower() == INCREASE_PREFFERED: return (mde[0], -mde_value, 'larger') elif mde[1].lower() == DECREASE_PREFFERED: return (mde[0], mde_value, 'smaller') return ( df.assign(**{MDE: lambda df: df[MDE_INPUT_COLUMN_NAME]}) .assign(**{ALTERNATIVE_HYPOTHESIS: lambda df: df.apply( lambda row: row[POINT_ESTIMATE] * _mde_2_signed_mde((row[MDE], (row[PREFERRED_DIRECTION_INPUT_NAME] if PREFERRED_DIRECTION_INPUT_NAME in row else np.nan) ))[1], axis=1)}) .assign(**{PREFERENCE: lambda df: df.apply(lambda row: _mde_2_signed_mde( (row[MDE], (row[PREFERRED_DIRECTION_INPUT_NAME] if PREFERRED_DIRECTION_INPUT_NAME in row else np.nan) ))[2], axis=1)}) .assign(**{NULL_HYPOTHESIS: 0})) else:
return (nim[0], nim_value, TWO_SIDED) elif nim[1].lower() == DECREASE_PREFFERED: return (nim[0], nim_value, 'smaller') else: raise ValueError(f'{nim[1].lower()} not in ' f'{[INCREASE_PREFFERED, DECREASE_PREFFERED]}')
df.assign(**{NIM: None}) .assign(**{NULL_HYPOTHESIS: 0}) .assign(**{PREFERENCE: TWO_SIDED}) ) df.assign(**{NIM: _nim_2_signed_nim((nims[0], nims[1]))[0]}) .assign(**{NULL_HYPOTHESIS: df[POINT_ESTIMATE] * _nim_2_signed_nim((nims[0], nims[1]))[1]}) .assign(**{PREFERENCE: _nim_2_signed_nim((nims[0], nims[1]))[2]}) .assign(**{ALTERNATIVE_HYPOTHESIS: 0}) ) elif type(nims) is dict: sgnd_nims = {group: _nim_2_signed_nim(nim) for group, nim in nims.items()} nim_df = ( DataFrame(index=df.index, columns=[NIM, NULL_HYPOTHESIS, PREFERENCE], data=list(df.index.to_series().map(sgnd_nims))) ) return ( df.assign(**{NIM: nim_df[NIM]}) .assign(**{NULL_HYPOTHESIS: df[POINT_ESTIMATE] * nim_df[NULL_HYPOTHESIS]}) .assign(**{PREFERENCE: nim_df[PREFERENCE]}) .assign(**{ALTERNATIVE_HYPOTHESIS: 0}) ) elif type(nims) is bool: return ( df.assign(**{NIM: lambda df: df[NIM_INPUT_COLUMN_NAME]}) .assign(**{NULL_HYPOTHESIS: lambda df: df.apply( lambda row: row[POINT_ESTIMATE] * _nim_2_signed_nim((row[NIM], row[PREFERRED_DIRECTION_INPUT_NAME]))[1], axis=1)}) .assign(**{PREFERENCE: lambda df: df.apply(lambda row: _nim_2_signed_nim( (row[NIM], row[PREFERRED_DIRECTION_INPUT_NAME]))[2], axis=1)}) .assign(**{ALTERNATIVE_HYPOTHESIS: 0}) ) else: raise ValueError(f'non_inferiority_margins must be None, tuple, dict,' f'or DataFrame, but is {type(nims)}.')
or (type(x) is float and type(y) is float and np.isnan(x) and np.isnan(y)) else False
df.apply(lambda row: equals_none_or_nan(row[PREFERENCE + SFX1], row[PREFERENCE + SFX2]), axis=1).all()): df.rename(columns={NIM + SFX1: NIM, NULL_HYPOTHESIS + SFX1: NULL_HYPOTHESIS, PREFERENCE + SFX1: PREFERENCE}) .drop(columns=[NIM + SFX2, NULL_HYPOTHESIS + SFX2, PREFERENCE + SFX2]) )
raise ValueError("Non-inferiority margins do not agree across levels")
df.rename(columns={column + SFX1: column}) .drop(columns=[column + SFX2]) )
raise ValueError(f"Values of {column} do not agree across levels")
level_columns: Union[str, Iterable], level_1: Union[str, Tuple], level_2: Union[str, Tuple]) -> DataFrame: gdf = df.groupby(level_columns) return concat([gdf.get_group(level_1), gdf.get_group(level_2)])
else:
columns_that_must_exist, group_columns: Iterable, ordinal_group_column: str): """Integrity check input dataframe. """ _validate_column(df, col)
raise ValueError("""At least one of `categorical_group_columns` or `ordinal_group_column` must be specified.""" )
# Ensure there's at most 1 observation per grouping. df.groupby(group_columns).size() <= 1) raise ValueError( """Each grouping should have at most 1 observation.""")
ordinal_group_column].dtype.type and not issubclass(ordinal_column_type, np.datetime64): raise TypeError("""`ordinal_group_column` is type `{}`. Must be number or datetime type.""".format(ordinal_column_type))
raise ValueError(f"""Column {col} is not in dataframe""")
finite_numbers = numbers[numbers.abs() != float("inf")] return finite_numbers.min(), finite_numbers.max()
absolute: bool, extra_zeros: int = 0) -> Tuple[str, float, float]: min_value, max_value = _get_finite_bounds(numbers)
if max_value == min_value: return "0.00", min_value, max_value
extra_zeros += 2 if absolute else 0 precision = -int(np.log10(abs(max_value - min_value))) + extra_zeros zeros = ''.join(['0'] * precision) return "0.{}{}".format(zeros, '' if absolute else '%'), min_value, max_value
return s.clip(-100*abs(limit), 100*abs(limit))
return df.assign(color=df[cols].agg(level2str, axis='columns'))
baseline_var: float, alpha: float, n1: int, n2: int) -> float:
z_alpha = norm.ppf(1 - alpha / 2) a = abs(mde) / np.sqrt(baseline_var) b = np.sqrt(n1 * n2 / (n1 + n2)) z_stat = a * b
return norm.cdf(z_stat - z_alpha) + norm.cdf(-z_stat - z_alpha) |