Coverage for /Users/sebastiana/Documents/Sugarpills/confidence/spotify_confidence/analysis/frequentist/confidence_computers/generic_computer.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

395 statements  

1# Copyright 2017-2020 Spotify AB 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15from typing import Union, Iterable, List, Tuple, Dict 

16from warnings import warn 

17 

18import numpy as np 

19from numpy import isnan 

20from pandas import DataFrame, Series 

21from scipy import stats as st 

22from statsmodels.stats.multitest import multipletests 

23 

24import spotify_confidence.analysis.frequentist.confidence_computers.bootstrap_computer as bootstrap_computer 

25import spotify_confidence.analysis.frequentist.confidence_computers.chi_squared_computer as chi_squared_computer 

26import spotify_confidence.analysis.frequentist.confidence_computers.t_test_computer as t_test_computer 

27import spotify_confidence.analysis.frequentist.confidence_computers.z_test_computer as z_test_computers 

28import spotify_confidence.analysis.frequentist.confidence_computers.z_test_linreg_computer as z_test_linreg_computer 

29from spotify_confidence.analysis.abstract_base_classes.confidence_computer_abc import ConfidenceComputerABC 

30from spotify_confidence.analysis.confidence_utils import ( 

31 get_remaning_groups, 

32 validate_levels, 

33 level2str, 

34 listify, 

35 validate_and_rename_columns, 

36 drop_and_rename_columns, 

37 get_all_categorical_group_columns, 

38 get_all_group_columns, 

39 validate_data, 

40 remove_group_columns, 

41 groupbyApplyParallel, 

42 is_non_inferiority, 

43 reset_named_indices, 

44) 

45from spotify_confidence.analysis.constants import ( 

46 NUMERATOR, 

47 NUMERATOR_SUM_OF_SQUARES, 

48 DENOMINATOR, 

49 BOOTSTRAPS, 

50 INTERVAL_SIZE, 

51 POINT_ESTIMATE, 

52 FINAL_EXPECTED_SAMPLE_SIZE, 

53 ORDINAL_GROUP_COLUMN, 

54 MDE, 

55 METHOD, 

56 CORRECTION_METHOD, 

57 ABSOLUTE, 

58 VARIANCE, 

59 NUMBER_OF_COMPARISONS, 

60 TREATMENT_WEIGHTS, 

61 IS_BINARY, 

62 FEATURE, 

63 FEATURE_SUMSQ, 

64 FEATURE_CROSS, 

65 CI_LOWER, 

66 CI_UPPER, 

67 DIFFERENCE, 

68 P_VALUE, 

69 SFX1, 

70 SFX2, 

71 STD_ERR, 

72 ALPHA, 

73 ADJUSTED_ALPHA, 

74 ADJUSTED_ALPHA_POWER_SAMPLE_SIZE, 

75 POWER, 

76 POWERED_EFFECT, 

77 ADJUSTED_POWER, 

78 ADJUSTED_P, 

79 ADJUSTED_LOWER, 

80 ADJUSTED_UPPER, 

81 IS_SIGNIFICANT, 

82 REQUIRED_SAMPLE_SIZE, 

83 REQUIRED_SAMPLE_SIZE_METRIC, 

84 OPTIMAL_KAPPA, 

85 OPTIMAL_WEIGHTS, 

86 CI_WIDTH, 

87 NULL_HYPOTHESIS, 

88 ALTERNATIVE_HYPOTHESIS, 

89 NIM, 

90 PREFERENCE, 

91 PREFERENCE_TEST, 

92 TWO_SIDED, 

93 PREFERENCE_DICT, 

94 BONFERRONI, 

95 HOLM, 

96 HOMMEL, 

97 SIMES_HOCHBERG, 

98 SIDAK, 

99 HOLM_SIDAK, 

100 FDR_BH, 

101 FDR_BY, 

102 FDR_TSBH, 

103 FDR_TSBKY, 

104 SPOT_1_HOLM, 

105 SPOT_1_HOMMEL, 

106 SPOT_1_SIMES_HOCHBERG, 

107 SPOT_1_SIDAK, 

108 SPOT_1_HOLM_SIDAK, 

109 SPOT_1_FDR_BH, 

110 SPOT_1_FDR_BY, 

111 SPOT_1_FDR_TSBH, 

112 SPOT_1_FDR_TSBKY, 

113 BONFERRONI_ONLY_COUNT_TWOSIDED, 

114 BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, 

115 SPOT_1, 

116 CORRECTION_METHODS, 

117 BOOTSTRAP, 

118 CHI2, 

119 TTEST, 

120 ZTEST, 

121 NIM_TYPE, 

122 CORRECTION_METHODS_THAT_REQUIRE_METRIC_INFO, 

123 NIM_COLUMN_DEFAULT, 

124 PREFERRED_DIRECTION_COLUMN_DEFAULT, 

125 INCREASE_PREFFERED, 

126 DECREASE_PREFFERED, 

127 ZTESTLINREG, 

128 ORIGINAL_POINT_ESTIMATE, 

129 ORIGINAL_VARIANCE, 

130 VARIANCE_REDUCTION, 

131) 

132 

133confidence_computers = { 

134 CHI2: chi_squared_computer, 

135 TTEST: t_test_computer, 

136 ZTEST: z_test_computers, 

137 BOOTSTRAP: bootstrap_computer, 

138 ZTESTLINREG: z_test_linreg_computer, 

139} 

140 

141 

142class GenericComputer(ConfidenceComputerABC): 

143 def __init__( 

144 self, 

145 data_frame: DataFrame, 

146 numerator_column: Union[str, None], 

147 numerator_sum_squares_column: Union[str, None], 

148 denominator_column: Union[str, None], 

149 categorical_group_columns: Union[str, Iterable], 

150 ordinal_group_column: Union[str, None], 

151 interval_size: float, 

152 correction_method: str, 

153 method_column: str, 

154 bootstrap_samples_column: Union[str, None], 

155 metric_column: Union[str, None], 

156 treatment_column: Union[str, None], 

157 power: float, 

158 point_estimate_column: str, 

159 var_column: str, 

160 is_binary_column: str, 

161 feature_column: Union[str, None], 

162 feature_sum_squares_column: Union[str, None], 

163 feature_cross_sum_column: Union[str, None], 

164 ): 

165 

166 self._df = data_frame.reset_index(drop=True) 

167 self._point_estimate_column = point_estimate_column 

168 self._var_column = var_column 

169 self._is_binary = is_binary_column 

170 self._numerator = numerator_column 

171 self._numerator_sumsq = numerator_sum_squares_column 

172 if self._numerator is not None and (self._numerator_sumsq is None or self._numerator_sumsq == self._numerator): 

173 if (data_frame[numerator_column] <= data_frame[denominator_column]).all(): 

174 # Treat as binomial data 

175 self._numerator_sumsq = self._numerator 

176 else: 

177 raise ValueError( 

178 f"numerator_sum_squares_column missing or same as " 

179 f"numerator_column, but since {numerator_column} is not " 

180 f"always smaller than {denominator_column} it can't be " 

181 f"binomial data. Please check your data." 

182 ) 

183 

184 self._denominator = denominator_column 

185 self._categorical_group_columns = get_all_categorical_group_columns( 

186 categorical_group_columns, metric_column, treatment_column 

187 ) 

188 self._segments = remove_group_columns(self._categorical_group_columns, metric_column) 

189 self._segments = remove_group_columns(self._segments, treatment_column) 

190 self._ordinal_group_column = ordinal_group_column 

191 self._metric_column = metric_column 

192 self._interval_size = interval_size 

193 self._power = power 

194 self._treatment_column = treatment_column 

195 self._feature = feature_column 

196 self._feature_ssq = feature_sum_squares_column 

197 self._feature_cross = feature_cross_sum_column 

198 

199 if correction_method.lower() not in CORRECTION_METHODS: 

200 raise ValueError(f"Use one of the correction methods " + f"in {CORRECTION_METHODS}") 

201 self._correction_method = correction_method 

202 self._method_column = method_column 

203 

204 self._single_metric = False 

205 if self._metric_column is not None and data_frame.groupby(self._metric_column, sort=False).ngroups == 1: 

206 self._single_metric = True 

207 

208 self._all_group_columns = get_all_group_columns(self._categorical_group_columns, self._ordinal_group_column) 

209 self._bootstrap_samples_column = bootstrap_samples_column 

210 

211 columns_that_must_exist = [] 

212 if ( 

213 CHI2 in self._df[self._method_column] 

214 or TTEST in self._df[self._method_column] 

215 or ZTEST in self._df[self._method_column] 

216 ): 

217 if not self._point_estimate_column or not self._var_column: 

218 columns_that_must_exist += [self._numerator, self._denominator] 

219 columns_that_must_exist += [] if self._numerator_sumsq is None else [self._numerator_sumsq] 

220 else: 

221 columns_that_must_exist += [self._point_estimate_column, self._var_column] 

222 if BOOTSTRAP in self._df[self._method_column]: 

223 columns_that_must_exist += [self._bootstrap_samples_column] 

224 if ZTESTLINREG in self._df[self._method_column]: 

225 columns_that_must_exist += [self._feature, self._feature_ssq, self._feature_cross] 

226 

227 validate_data(self._df, columns_that_must_exist, self._all_group_columns, self._ordinal_group_column) 

228 

229 self._sufficient = None 

230 

231 def compute_summary(self, verbose: bool) -> DataFrame: 

232 return ( 

233 self._sufficient_statistics 

234 if verbose 

235 else self._sufficient_statistics[ 

236 self._all_group_columns 

237 + ([self._metric_column] if self._metric_column is not None and self._single_metric else []) 

238 + [c for c in [self._numerator, self._denominator] if c is not None] 

239 + [POINT_ESTIMATE, CI_LOWER, CI_UPPER] 

240 ] 

241 ) 

242 

243 @property 

244 def _sufficient_statistics(self) -> DataFrame: 

245 if self._sufficient is None: 

246 arg_dict = { 

247 NUMERATOR: self._numerator, 

248 NUMERATOR_SUM_OF_SQUARES: self._numerator_sumsq, 

249 DENOMINATOR: self._denominator, 

250 BOOTSTRAPS: self._bootstrap_samples_column, 

251 INTERVAL_SIZE: self._interval_size, 

252 FEATURE: self._feature, 

253 FEATURE_SUMSQ: self._feature_ssq, 

254 FEATURE_CROSS: self._feature_cross, 

255 } 

256 groupby = [col for col in [self._method_column, self._metric_column] if col is not None] 

257 self._sufficient = ( 

258 self._df.groupby(groupby, sort=False) 

259 .apply( 

260 lambda df: df.assign( 

261 **{ 

262 POINT_ESTIMATE: lambda df: df[self._point_estimate_column] 

263 if self._point_estimate_column is not None 

264 else confidence_computers[df[self._method_column].values[0]].point_estimate(df, arg_dict) 

265 } 

266 ) 

267 .assign( 

268 **{ 

269 VARIANCE: lambda df: df[self._var_column] 

270 if self._var_column is not None 

271 else confidence_computers[df[self._method_column].values[0]].variance(df, arg_dict) 

272 } 

273 ) 

274 .pipe( 

275 lambda df: df 

276 if self._point_estimate_column is not None 

277 else confidence_computers[df[self._method_column].values[0]].add_point_estimate_ci( 

278 df, arg_dict 

279 ) 

280 ) 

281 ) 

282 .pipe(reset_named_indices) 

283 ) 

284 return self._sufficient 

285 

286 def compute_difference( 

287 self, 

288 level_1: Union[str, Iterable], 

289 level_2: Union[str, Iterable], 

290 absolute: bool, 

291 groupby: Union[str, Iterable], 

292 nims: NIM_TYPE, 

293 final_expected_sample_size_column: str, 

294 verbose: bool, 

295 mde_column: str, 

296 ) -> DataFrame: 

297 level_columns = get_remaning_groups(self._all_group_columns, groupby) 

298 difference_df = self._compute_differences( 

299 level_columns=level_columns, 

300 levels=[(level_1, level_2)], 

301 absolute=absolute, 

302 groupby=groupby, 

303 level_as_reference=True, 

304 nims=nims, 

305 final_expected_sample_size_column=final_expected_sample_size_column, 

306 mde_column=mde_column, 

307 ) 

308 return ( 

309 difference_df 

310 if verbose 

311 else difference_df[ 

312 listify(groupby) 

313 + ["level_1", "level_2", "absolute_difference", DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE] 

314 + [ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT, POWERED_EFFECT, REQUIRED_SAMPLE_SIZE] 

315 + ([NIM, NULL_HYPOTHESIS, PREFERENCE] if nims is not None else []) 

316 ] 

317 ) 

318 

319 def compute_multiple_difference( 

320 self, 

321 level: Union[str, Iterable], 

322 absolute: bool, 

323 groupby: Union[str, Iterable], 

324 level_as_reference: bool, 

325 nims: NIM_TYPE, 

326 final_expected_sample_size_column: str, 

327 verbose: bool, 

328 mde_column: str, 

329 ) -> DataFrame: 

330 level_columns = get_remaning_groups(self._all_group_columns, groupby) 

331 other_levels = [ 

332 other 

333 for other in self._sufficient_statistics.groupby(level_columns, sort=False).groups.keys() 

334 if other != level 

335 ] 

336 levels = [(level, other) for other in other_levels] 

337 difference_df = self._compute_differences( 

338 level_columns=level_columns, 

339 levels=levels, 

340 absolute=absolute, 

341 groupby=groupby, 

342 level_as_reference=level_as_reference, 

343 nims=nims, 

344 final_expected_sample_size_column=final_expected_sample_size_column, 

345 mde_column=mde_column, 

346 ) 

347 return ( 

348 difference_df 

349 if verbose 

350 else difference_df[ 

351 listify(groupby) 

352 + [ 

353 "level_1", 

354 "level_2", 

355 "absolute_difference", 

356 DIFFERENCE, 

357 CI_LOWER, 

358 CI_UPPER, 

359 P_VALUE, 

360 POWERED_EFFECT, 

361 REQUIRED_SAMPLE_SIZE, 

362 ] 

363 + [ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT] 

364 + ([NIM, NULL_HYPOTHESIS, PREFERENCE] if nims is not None else []) 

365 ] 

366 ) 

367 

368 def compute_differences( 

369 self, 

370 levels: List[Tuple], 

371 absolute: bool, 

372 groupby: Union[str, Iterable], 

373 nims: NIM_TYPE, 

374 final_expected_sample_size_column: str, 

375 verbose: bool, 

376 mde_column: str, 

377 ) -> DataFrame: 

378 level_columns = get_remaning_groups(self._all_group_columns, groupby) 

379 difference_df = self._compute_differences( 

380 level_columns=level_columns, 

381 levels=[levels] if type(levels) == tuple else levels, 

382 absolute=absolute, 

383 groupby=groupby, 

384 level_as_reference=True, 

385 nims=nims, 

386 final_expected_sample_size_column=final_expected_sample_size_column, 

387 mde_column=mde_column, 

388 ) 

389 return ( 

390 difference_df 

391 if verbose 

392 else difference_df[ 

393 listify(groupby) 

394 + ["level_1", "level_2", "absolute_difference", DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE] 

395 + [ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT, POWERED_EFFECT, REQUIRED_SAMPLE_SIZE] 

396 + ([NIM, NULL_HYPOTHESIS, PREFERENCE] if nims is not None else []) 

397 ] 

398 ) 

399 

400 def _compute_differences( 

401 self, 

402 level_columns: Iterable, 

403 levels: Union[str, Iterable], 

404 absolute: bool, 

405 groupby: Union[str, Iterable], 

406 level_as_reference: bool, 

407 nims: NIM_TYPE, 

408 final_expected_sample_size_column: str, 

409 mde_column: str, 

410 ): 

411 if type(level_as_reference) is not bool: 

412 raise ValueError(f"level_is_reference must be either True or False, but is {level_as_reference}.") 

413 groupby = listify(groupby) 

414 unique_levels = set([l[0] for l in levels] + [l[1] for l in levels]) 

415 validate_levels(self._sufficient_statistics, level_columns, unique_levels) 

416 str2level = {level2str(lv): lv for lv in unique_levels} 

417 levels = [ 

418 (level2str(l[0]), level2str(l[1])) if level_as_reference else (level2str(l[1]), level2str(l[0])) 

419 for l in levels 

420 ] 

421 

422 def assign_total_denominator(df, groupby): 

423 if self._denominator is None: 

424 return df.assign(**{f"current_total_{self._denominator}": None}) 

425 

426 if len(groupby) == 0: 

427 return df.assign( 

428 **{f"current_total_{self._denominator}": self._sufficient_statistics[self._denominator].sum()} 

429 ) 

430 else: 

431 return df.merge( 

432 df.groupby(groupby, sort=False)[self._denominator] 

433 .sum() 

434 .reset_index() 

435 .rename(columns={self._denominator: f"current_total_{self._denominator}"}) 

436 ) 

437 

438 return ( 

439 self._sufficient_statistics.assign( 

440 level=self._sufficient_statistics[level_columns].agg(level2str, axis="columns") 

441 ) 

442 .pipe(assign_total_denominator, groupby) 

443 .query(f"level in {[l1 for l1,l2 in levels] + [l2 for l1,l2 in levels]}") 

444 .pipe(lambda df: df if groupby == [] else df.set_index(groupby)) 

445 .pipe( 

446 self._create_comparison_df, 

447 groups_to_compare=levels, 

448 absolute=absolute, 

449 nims=nims, 

450 mde_column=mde_column, 

451 final_expected_sample_size_column=final_expected_sample_size_column, 

452 ) 

453 .assign(level_1=lambda df: df["level_1"].map(lambda s: str2level[s])) 

454 .assign(level_2=lambda df: df["level_2"].map(lambda s: str2level[s])) 

455 .pipe(lambda df: df.reset_index([name for name in df.index.names if name is not None])) 

456 .reset_index(drop=True) 

457 .sort_values(by=groupby + ["level_1", "level_2"]) 

458 .reset_index(drop=True) 

459 ) 

460 

461 def _create_comparison_df( 

462 self, 

463 df: DataFrame, 

464 groups_to_compare: List[Tuple[str, str]], 

465 absolute: bool, 

466 nims: NIM_TYPE, 

467 mde_column: bool, 

468 final_expected_sample_size_column: str, 

469 ) -> DataFrame: 

470 def join(df: DataFrame) -> DataFrame: 

471 has_index = not all(idx is None for idx in df.index.names) 

472 if has_index: 

473 # self-join on index (the index will typically model the date, 

474 # i.e., rows with the same date are joined) 

475 return df.merge(df, left_index=True, right_index=True, suffixes=(SFX1, SFX2)) 

476 else: 

477 # join on dummy column, i.e. conduct a cross join 

478 return ( 

479 df.assign(dummy_join_column=1) 

480 .merge(right=df.assign(dummy_join_column=1), on="dummy_join_column", suffixes=(SFX1, SFX2)) 

481 .drop(columns="dummy_join_column") 

482 ) 

483 

484 comparison_df = ( 

485 df.pipe(add_nim_input_columns_from_tuple_or_dict, nims=nims, mde_column=mde_column) 

486 .pipe( 

487 add_nims_and_mdes, 

488 mde_column=mde_column, 

489 nim_column=NIM_COLUMN_DEFAULT, 

490 preferred_direction_column=PREFERRED_DIRECTION_COLUMN_DEFAULT, 

491 ) 

492 .pipe(join) 

493 .query( 

494 "(" 

495 + " or ".join([f"(level_1=='{l1}' and level_2=='{l2}')" for l1, l2 in groups_to_compare]) 

496 + ")" 

497 + "and level_1 != level_2" 

498 ) 

499 .pipe( 

500 validate_and_rename_columns, 

501 [NIM, mde_column, PREFERENCE, final_expected_sample_size_column, self._method_column], 

502 ) 

503 .pipe( 

504 drop_and_rename_columns, 

505 [NULL_HYPOTHESIS, ALTERNATIVE_HYPOTHESIS, f"current_total_{self._denominator}"] 

506 + ([ORIGINAL_POINT_ESTIMATE] if ORIGINAL_POINT_ESTIMATE in df.columns else []), 

507 ) 

508 .assign(**{PREFERENCE_TEST: lambda df: TWO_SIDED if self._correction_method == SPOT_1 else df[PREFERENCE]}) 

509 .assign(**{POWER: self._power}) 

510 .pipe(self._add_adjusted_power) 

511 ) 

512 

513 groups_except_ordinal = [ 

514 column 

515 for column in df.index.names 

516 if column is not None 

517 and (column != self._ordinal_group_column or final_expected_sample_size_column is None) 

518 ] 

519 n_comparisons = self._get_num_comparisons( 

520 comparison_df, 

521 self._correction_method, 

522 number_of_level_comparisons=comparison_df.groupby(["level_1", "level_2"], sort=False).ngroups, 

523 groupby=groups_except_ordinal, 

524 ) 

525 

526 arg_dict = { 

527 NUMERATOR: self._numerator, 

528 NUMERATOR_SUM_OF_SQUARES: self._numerator_sumsq, 

529 DENOMINATOR: self._denominator, 

530 BOOTSTRAPS: self._bootstrap_samples_column, 

531 FINAL_EXPECTED_SAMPLE_SIZE: final_expected_sample_size_column, 

532 ORDINAL_GROUP_COLUMN: self._ordinal_group_column, 

533 MDE: mde_column, 

534 METHOD: self._method_column, 

535 CORRECTION_METHOD: self._correction_method, 

536 INTERVAL_SIZE: self._interval_size, 

537 ABSOLUTE: absolute, 

538 NUMBER_OF_COMPARISONS: n_comparisons, 

539 } 

540 comparison_df = groupbyApplyParallel( 

541 comparison_df.groupby(groups_except_ordinal + [self._method_column], as_index=False, sort=False), 

542 lambda df: _compute_comparisons(df, arg_dict=arg_dict), 

543 ) 

544 return comparison_df 

545 

546 def compute_sample_size( 

547 self, 

548 treatment_weights: Iterable, 

549 mde_column: str, 

550 nim_column: str, 

551 preferred_direction_column: str, 

552 final_expected_sample_size_column: str, 

553 ) -> DataFrame: 

554 arg_dict, group_columns, sample_size_df = self._initialise_sample_size_and_power_computation( 

555 final_expected_sample_size_column, mde_column, nim_column, preferred_direction_column, treatment_weights 

556 ) 

557 sample_size_df = groupbyApplyParallel( 

558 sample_size_df.pipe(set_alpha_and_adjust_preference, arg_dict=arg_dict).groupby( 

559 group_columns + [self._method_column], 

560 as_index=False, 

561 sort=False, 

562 ), 

563 lambda df: _compute_sample_sizes_and_ci_widths(df, arg_dict=arg_dict), 

564 ) 

565 

566 return sample_size_df.reset_index() 

567 

568 def compute_powered_effect( 

569 self, 

570 treatment_weights: Iterable, 

571 mde_column: str, 

572 nim_column: str, 

573 preferred_direction_column: str, 

574 sample_size: float, 

575 ) -> DataFrame: 

576 arg_dict, group_columns, powered_effect_df = self._initialise_sample_size_and_power_computation( 

577 sample_size, mde_column, nim_column, preferred_direction_column, treatment_weights 

578 ) 

579 powered_effect_df = groupbyApplyParallel( 

580 powered_effect_df.pipe(set_alpha_and_adjust_preference, arg_dict=arg_dict).groupby( 

581 group_columns + [self._method_column], 

582 as_index=False, 

583 sort=False, 

584 ), 

585 lambda df: _compute_powered_effects(df, arg_dict=arg_dict), 

586 ) 

587 

588 return powered_effect_df.reset_index() 

589 

590 def _initialise_sample_size_and_power_computation( 

591 self, final_expected_sample_size_column, mde_column, nim_column, preferred_direction_column, treatment_weights 

592 ): 

593 sample_size_df = ( 

594 self._sufficient_statistics.pipe( 

595 lambda df: df if self._all_group_columns == [] else df.set_index(self._all_group_columns) 

596 ) 

597 .pipe( 

598 add_nims_and_mdes, 

599 mde_column=mde_column, 

600 nim_column=nim_column, 

601 preferred_direction_column=preferred_direction_column, 

602 ) 

603 .assign(**{PREFERENCE_TEST: lambda df: TWO_SIDED if self._correction_method == SPOT_1 else df[PREFERENCE]}) 

604 .assign(**{POWER: self._power}) 

605 .pipe(self._add_adjusted_power) 

606 ) 

607 group_columns = [column for column in sample_size_df.index.names if column is not None] 

608 n_comparisons = self._get_num_comparisons( 

609 sample_size_df, 

610 self._correction_method, 

611 number_of_level_comparisons=len(treatment_weights) - 1, 

612 groupby=group_columns, 

613 ) 

614 arg_dict = { 

615 MDE: mde_column, 

616 METHOD: self._method_column, 

617 NUMBER_OF_COMPARISONS: n_comparisons, 

618 TREATMENT_WEIGHTS: treatment_weights, 

619 INTERVAL_SIZE: self._interval_size, 

620 CORRECTION_METHOD: self._correction_method, 

621 IS_BINARY: self._is_binary, 

622 FINAL_EXPECTED_SAMPLE_SIZE: final_expected_sample_size_column, 

623 } 

624 return arg_dict, group_columns, sample_size_df 

625 

626 def compute_optimal_weights_and_sample_size( 

627 self, sample_size_df: DataFrame, number_of_groups: int 

628 ) -> Tuple[Iterable, int]: 

629 sample_size_df = ( 

630 sample_size_df.reset_index(drop=True) 

631 .assign(**{OPTIMAL_KAPPA: lambda df: df.apply(_optimal_kappa, is_binary_column=self._is_binary, axis=1)}) 

632 .assign( 

633 **{ 

634 OPTIMAL_WEIGHTS: lambda df: df.apply( 

635 lambda row: _optimal_weights(row[OPTIMAL_KAPPA], number_of_groups), axis=1 

636 ) 

637 } 

638 ) 

639 ) 

640 

641 group_columns = [column for column in sample_size_df.index.names if column is not None] + [self._method_column] 

642 arg_dict = { 

643 METHOD: self._method_column, 

644 IS_BINARY: self._is_binary, 

645 } 

646 return _find_optimal_group_weights_across_rows(sample_size_df, number_of_groups, group_columns, arg_dict) 

647 

648 def _add_adjusted_power(self, df: DataFrame) -> DataFrame: 

649 if self._correction_method in CORRECTION_METHODS_THAT_REQUIRE_METRIC_INFO: 

650 if self._metric_column is None: 

651 return df.assign(**{ADJUSTED_POWER: None}) 

652 else: 

653 number_total_metrics = ( 

654 1 if self._single_metric else df.groupby(self._metric_column, sort=False).ngroups 

655 ) 

656 if self._single_metric: 

657 if df[df[NIM].isnull()].shape[0] > 0: 

658 number_success_metrics = 1 

659 else: 

660 number_success_metrics = 0 

661 else: 

662 number_success_metrics = df[df[NIM].isnull()].groupby(self._metric_column, sort=False).ngroups 

663 

664 number_guardrail_metrics = number_total_metrics - number_success_metrics 

665 power_correction = ( 

666 number_guardrail_metrics if number_success_metrics == 0 else number_guardrail_metrics + 1 

667 ) 

668 return df.assign(**{ADJUSTED_POWER: 1 - (1 - df[POWER]) / power_correction}) 

669 else: 

670 return df.assign(**{ADJUSTED_POWER: df[POWER]}) 

671 

672 def achieved_power(self, level_1, level_2, mde, alpha, groupby): 

673 groupby = listify(groupby) 

674 level_columns = get_remaning_groups(self._all_group_columns, groupby) 

675 arg_dict = {NUMERATOR: self._numerator, DENOMINATOR: self._denominator} 

676 return ( 

677 self._compute_differences( 

678 level_columns, 

679 [(level_1, level_2)], 

680 True, 

681 groupby, 

682 level_as_reference=True, 

683 nims=None, 

684 final_expected_sample_size_column=None, 

685 mde_column=None, 

686 ) # TODO: IS this right? 

687 .pipe(lambda df: df if groupby == [] else df.set_index(groupby)) 

688 .assign( 

689 achieved_power=lambda df: df.apply( 

690 lambda row: confidence_computers[row[self._method_column]].achieved_power( 

691 row, mde=mde, alpha=alpha, arg_dict=arg_dict 

692 ), 

693 axis=1, 

694 ) 

695 ) 

696 )[["level_1", "level_2", "achieved_power"]] 

697 

698 def _get_num_comparisons( 

699 self, df: DataFrame, correction_method: str, number_of_level_comparisons: int, groupby: Iterable 

700 ) -> int: 

701 if correction_method == BONFERRONI: 

702 return max( 

703 1, 

704 number_of_level_comparisons * df.assign(_dummy_=1).groupby(groupby + ["_dummy_"], sort=False).ngroups, 

705 ) 

706 elif correction_method == BONFERRONI_ONLY_COUNT_TWOSIDED: 

707 return max( 

708 number_of_level_comparisons 

709 * df.query(f'{PREFERENCE_TEST} == "{TWO_SIDED}"') 

710 .assign(_dummy_=1) 

711 .groupby(groupby + ["_dummy_"], sort=False) 

712 .ngroups, 

713 1, 

714 ) 

715 elif correction_method in [ 

716 HOLM, 

717 HOMMEL, 

718 SIMES_HOCHBERG, 

719 SIDAK, 

720 HOLM_SIDAK, 

721 FDR_BH, 

722 FDR_BY, 

723 FDR_TSBH, 

724 FDR_TSBKY, 

725 ]: 

726 return 1 

727 elif correction_method in [ 

728 BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, 

729 SPOT_1, 

730 SPOT_1_HOLM, 

731 SPOT_1_HOMMEL, 

732 SPOT_1_SIMES_HOCHBERG, 

733 SPOT_1_SIDAK, 

734 SPOT_1_HOLM_SIDAK, 

735 SPOT_1_FDR_BH, 

736 SPOT_1_FDR_BY, 

737 SPOT_1_FDR_TSBH, 

738 SPOT_1_FDR_TSBKY, 

739 ]: 

740 if self._metric_column is None or self._treatment_column is None: 

741 return max( 

742 1, 

743 number_of_level_comparisons 

744 * df[df[NIM].isnull()].assign(_dummy_=1).groupby(groupby + ["_dummy_"], sort=False).ngroups, 

745 ) 

746 else: 

747 if self._single_metric: 

748 if df[df[NIM].isnull()].shape[0] > 0: 

749 number_success_metrics = 1 

750 else: 

751 number_success_metrics = 0 

752 else: 

753 number_success_metrics = df[df[NIM].isnull()].groupby(self._metric_column, sort=False).ngroups 

754 

755 number_segments = ( 

756 1 

757 if len(self._segments) == 0 or not all(item in df.index.names for item in self._segments) 

758 else df.groupby(self._segments, sort=False).ngroups 

759 ) 

760 

761 return max(1, number_of_level_comparisons * max(1, number_success_metrics) * number_segments) 

762 else: 

763 raise ValueError(f"Unsupported correction method: {correction_method}.") 

764 

765 

766def add_nim_input_columns_from_tuple_or_dict(df, nims: NIM_TYPE, mde_column: str) -> DataFrame: 

767 if type(nims) is tuple: 

768 return df.assign(**{NIM_COLUMN_DEFAULT: nims[0]}).assign(**{PREFERRED_DIRECTION_COLUMN_DEFAULT: nims[1]}) 

769 elif type(nims) is dict: 

770 nim_values = {key: value[0] for key, value in nims.items()} 

771 nim_preferences = {key: value[1] for key, value in nims.items()} 

772 return df.assign(**{NIM_COLUMN_DEFAULT: lambda df: df.index.to_series().map(nim_values)}).assign( 

773 **{PREFERRED_DIRECTION_COLUMN_DEFAULT: lambda df: df.index.to_series().map(nim_preferences)} 

774 ) 

775 elif nims is None: 

776 return df.assign(**{NIM_COLUMN_DEFAULT: None}).assign( 

777 **{ 

778 PREFERRED_DIRECTION_COLUMN_DEFAULT: None 

779 if PREFERRED_DIRECTION_COLUMN_DEFAULT not in df or mde_column is None 

780 else df[PREFERRED_DIRECTION_COLUMN_DEFAULT] 

781 } 

782 ) 

783 else: 

784 return df 

785 

786 

787def add_nims_and_mdes(df: DataFrame, mde_column: str, nim_column: str, preferred_direction_column: str) -> DataFrame: 

788 def _set_nims_and_mdes(grp: DataFrame) -> DataFrame: 

789 nim = grp[nim_column].astype(float) 

790 input_preference = grp[preferred_direction_column].values[0] 

791 mde = None if mde_column is None else grp[mde_column] 

792 

793 nim_is_na = nim.isna().all() 

794 mde_is_na = True if mde is None else mde.isna().all() 

795 if input_preference is None or (type(input_preference) is float and isnan(input_preference)): 

796 signed_nim = 0.0 if nim_is_na else nim * grp[POINT_ESTIMATE] 

797 preference = TWO_SIDED 

798 signed_mde = None if mde_is_na else mde * grp[POINT_ESTIMATE] 

799 elif input_preference.lower() == INCREASE_PREFFERED: 

800 signed_nim = 0.0 if nim_is_na else -nim * grp[POINT_ESTIMATE] 

801 preference = "larger" 

802 signed_mde = None if mde_is_na else mde * grp[POINT_ESTIMATE] 

803 elif input_preference.lower() == DECREASE_PREFFERED: 

804 signed_nim = 0.0 if nim_is_na else nim * grp[POINT_ESTIMATE] 

805 preference = "smaller" 

806 signed_mde = None if mde_is_na else -mde * grp[POINT_ESTIMATE] 

807 else: 

808 raise ValueError(f"{input_preference.lower()} not in " f"{[INCREASE_PREFFERED, DECREASE_PREFFERED]}") 

809 

810 return ( 

811 grp.assign(**{NIM: nim}) 

812 .assign(**{PREFERENCE: preference}) 

813 .assign(**{NULL_HYPOTHESIS: signed_nim}) 

814 .assign(**{ALTERNATIVE_HYPOTHESIS: signed_mde if nim_is_na else 0.0}) 

815 ) 

816 

817 index_names = [name for name in df.index.names if name is not None] 

818 return ( 

819 df.groupby( 

820 [nim_column, preferred_direction_column] + listify(mde_column), dropna=False, as_index=False, sort=False 

821 ) 

822 .apply(_set_nims_and_mdes) 

823 .pipe(lambda df: df.reset_index(index_names)) 

824 .reset_index(drop=True) 

825 .pipe(lambda df: df if index_names == [] else df.set_index(index_names)) 

826 ) 

827 

828 

829def _compute_comparisons(df: DataFrame, arg_dict: Dict) -> DataFrame: 

830 return ( 

831 df.assign(**{DIFFERENCE: lambda df: df[POINT_ESTIMATE + SFX2] - df[POINT_ESTIMATE + SFX1]}) 

832 .assign(**{STD_ERR: confidence_computers[df[arg_dict[METHOD]].values[0]].std_err(df, arg_dict)}) 

833 .pipe(_add_p_value_and_ci, arg_dict=arg_dict) 

834 .pipe(_powered_effect_and_required_sample_size_from_difference_df, arg_dict=arg_dict) 

835 .pipe(_adjust_if_absolute, absolute=arg_dict[ABSOLUTE]) 

836 .assign(**{PREFERENCE: lambda df: df[PREFERENCE].map(PREFERENCE_DICT)}) 

837 .pipe(_add_variance_reduction_rate, arg_dict=arg_dict) 

838 ) 

839 

840 

841def _add_variance_reduction_rate(df: DataFrame, arg_dict: Dict) -> DataFrame: 

842 denominator = arg_dict[DENOMINATOR] 

843 method_column = arg_dict[METHOD] 

844 if (df[method_column] == ZTESTLINREG).any(): 

845 variance_no_reduction = ( 

846 df[ORIGINAL_VARIANCE + SFX1] / df[denominator + SFX1] 

847 + df[ORIGINAL_VARIANCE + SFX2] / df[denominator + SFX2] 

848 ) 

849 variance_w_reduction = ( 

850 df[VARIANCE + SFX1] / df[denominator + SFX1] + df[VARIANCE + SFX2] / df[denominator + SFX2] 

851 ) 

852 df = df.assign(**{VARIANCE_REDUCTION: 1 - np.divide(variance_w_reduction, variance_no_reduction)}) 

853 return df 

854 

855 

856def _add_p_value_and_ci(df: DataFrame, arg_dict: Dict) -> DataFrame: 

857 def _add_adjusted_p_and_is_significant(df: DataFrame, arg_dict: Dict) -> DataFrame: 

858 n_comparisons = arg_dict[NUMBER_OF_COMPARISONS] 

859 if arg_dict[FINAL_EXPECTED_SAMPLE_SIZE] is not None: 

860 if arg_dict[CORRECTION_METHOD] not in [ 

861 BONFERRONI, 

862 BONFERRONI_ONLY_COUNT_TWOSIDED, 

863 BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, 

864 SPOT_1, 

865 ]: 

866 raise ValueError( 

867 f"{arg_dict[CORRECTION_METHOD]} not supported for sequential tests. Use one of" 

868 f"{BONFERRONI}, {BONFERRONI_ONLY_COUNT_TWOSIDED}, " 

869 f"{BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY}, {SPOT_1}" 

870 ) 

871 adjusted_alpha = _compute_sequential_adjusted_alpha(df, arg_dict[METHOD], arg_dict) 

872 df = df.merge(adjusted_alpha, left_index=True, right_index=True) 

873 df[IS_SIGNIFICANT] = df[P_VALUE] < df[ADJUSTED_ALPHA] 

874 df[P_VALUE] = None 

875 df[ADJUSTED_P] = None 

876 elif arg_dict[CORRECTION_METHOD] in [ 

877 HOLM, 

878 HOMMEL, 

879 SIMES_HOCHBERG, 

880 SIDAK, 

881 HOLM_SIDAK, 

882 FDR_BH, 

883 FDR_BY, 

884 FDR_TSBH, 

885 FDR_TSBKY, 

886 SPOT_1_HOLM, 

887 SPOT_1_HOMMEL, 

888 SPOT_1_SIMES_HOCHBERG, 

889 SPOT_1_SIDAK, 

890 SPOT_1_HOLM_SIDAK, 

891 SPOT_1_FDR_BH, 

892 SPOT_1_FDR_BY, 

893 SPOT_1_FDR_TSBH, 

894 SPOT_1_FDR_TSBKY, 

895 ]: 

896 if arg_dict[CORRECTION_METHOD].startswith("spot-"): 

897 correction_method = arg_dict[CORRECTION_METHOD][7:] 

898 else: 

899 correction_method = arg_dict[CORRECTION_METHOD] 

900 df[ADJUSTED_ALPHA] = df[ALPHA] / n_comparisons 

901 is_significant, adjusted_p, _, _ = multipletests( 

902 pvals=df[P_VALUE], alpha=1 - arg_dict[INTERVAL_SIZE], method=correction_method 

903 ) 

904 df[ADJUSTED_P] = adjusted_p 

905 df[IS_SIGNIFICANT] = is_significant 

906 elif arg_dict[CORRECTION_METHOD] in [ 

907 BONFERRONI, 

908 BONFERRONI_ONLY_COUNT_TWOSIDED, 

909 BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, 

910 SPOT_1, 

911 ]: 

912 df[ADJUSTED_ALPHA] = df[ALPHA] / n_comparisons 

913 df[ADJUSTED_P] = df[P_VALUE].map(lambda p: min(p * n_comparisons, 1)) 

914 df[IS_SIGNIFICANT] = df[P_VALUE] < df[ADJUSTED_ALPHA] 

915 else: 

916 raise ValueError("Can't figure out which correction method to use :(") 

917 

918 return df 

919 

920 def _compute_sequential_adjusted_alpha(df: DataFrame, method_column: str, arg_dict: Dict) -> Series: 

921 if all(df[method_column] == "z-test"): 

922 return confidence_computers["z-test"].compute_sequential_adjusted_alpha(df, arg_dict) 

923 else: 

924 raise NotImplementedError("Sequential testing is only supported for z-tests") 

925 

926 def _add_ci(df: DataFrame, arg_dict: Dict) -> DataFrame: 

927 lower, upper = confidence_computers[df[arg_dict[METHOD]].values[0]].ci(df, ALPHA, arg_dict) 

928 

929 if ( 

930 arg_dict[CORRECTION_METHOD] 

931 in [ 

932 HOLM, 

933 HOMMEL, 

934 SIMES_HOCHBERG, 

935 SPOT_1_HOLM, 

936 SPOT_1_HOMMEL, 

937 SPOT_1_SIMES_HOCHBERG, 

938 ] 

939 and all(df[PREFERENCE_TEST] != TWO_SIDED) 

940 ): 

941 if all(df[arg_dict[METHOD]] == "z-test"): 

942 adjusted_lower, adjusted_upper = confidence_computers["z-test"].ci_for_multiple_comparison_methods( 

943 df, arg_dict[CORRECTION_METHOD], alpha=1 - arg_dict[INTERVAL_SIZE] 

944 ) 

945 else: 

946 raise NotImplementedError(f"{arg_dict[CORRECTION_METHOD]} is only supported for ZTests") 

947 elif arg_dict[CORRECTION_METHOD] in [ 

948 BONFERRONI, 

949 BONFERRONI_ONLY_COUNT_TWOSIDED, 

950 BONFERRONI_DO_NOT_COUNT_NON_INFERIORITY, 

951 SPOT_1, 

952 SPOT_1_HOLM, 

953 SPOT_1_HOMMEL, 

954 SPOT_1_SIMES_HOCHBERG, 

955 SPOT_1_SIDAK, 

956 SPOT_1_HOLM_SIDAK, 

957 SPOT_1_FDR_BH, 

958 SPOT_1_FDR_BY, 

959 SPOT_1_FDR_TSBH, 

960 SPOT_1_FDR_TSBKY, 

961 ]: 

962 adjusted_lower, adjusted_upper = confidence_computers[df[arg_dict[METHOD]].values[0]].ci( 

963 df, ADJUSTED_ALPHA, arg_dict 

964 ) 

965 else: 

966 warn(f"Confidence intervals not supported for {arg_dict[CORRECTION_METHOD]}") 

967 adjusted_lower = None 

968 adjusted_upper = None 

969 

970 return ( 

971 df.assign(**{CI_LOWER: lower}) 

972 .assign(**{CI_UPPER: upper}) 

973 .assign(**{ADJUSTED_LOWER: adjusted_lower}) 

974 .assign(**{ADJUSTED_UPPER: adjusted_upper}) 

975 ) 

976 

977 return ( 

978 df.pipe(set_alpha_and_adjust_preference, arg_dict=arg_dict) 

979 .assign(**{P_VALUE: lambda df: df.pipe(_p_value, arg_dict=arg_dict)}) 

980 .pipe(_add_adjusted_p_and_is_significant, arg_dict=arg_dict) 

981 .pipe(_add_ci, arg_dict=arg_dict) 

982 ) 

983 

984 

985def set_alpha_and_adjust_preference(df: DataFrame, arg_dict: Dict) -> DataFrame: 

986 alpha_0 = 1 - arg_dict[INTERVAL_SIZE] 

987 return df.assign( 

988 **{ 

989 ALPHA: df.apply( 

990 lambda row: 2 * alpha_0 

991 if arg_dict[CORRECTION_METHOD] == SPOT_1 and row[PREFERENCE] != TWO_SIDED 

992 else alpha_0, 

993 axis=1, 

994 ) 

995 } 

996 ).assign(**{ADJUSTED_ALPHA_POWER_SAMPLE_SIZE: lambda df: df[ALPHA] / arg_dict[NUMBER_OF_COMPARISONS]}) 

997 

998 

999def _adjust_if_absolute(df: DataFrame, absolute: bool) -> DataFrame: 

1000 if absolute: 

1001 return df.assign(absolute_difference=absolute) 

1002 else: 

1003 return ( 

1004 df.assign(absolute_difference=absolute) 

1005 .assign(**{DIFFERENCE: df[DIFFERENCE] / df[POINT_ESTIMATE + SFX1]}) 

1006 .assign(**{CI_LOWER: df[CI_LOWER] / df[POINT_ESTIMATE + SFX1]}) 

1007 .assign(**{CI_UPPER: df[CI_UPPER] / df[POINT_ESTIMATE + SFX1]}) 

1008 .assign(**{ADJUSTED_LOWER: df[ADJUSTED_LOWER] / df[POINT_ESTIMATE + SFX1]}) 

1009 .assign(**{ADJUSTED_UPPER: df[ADJUSTED_UPPER] / df[POINT_ESTIMATE + SFX1]}) 

1010 .assign(**{NULL_HYPOTHESIS: df[NULL_HYPOTHESIS] / df[POINT_ESTIMATE + SFX1]}) 

1011 .assign(**{POWERED_EFFECT: df[POWERED_EFFECT] / df[POINT_ESTIMATE + SFX1]}) 

1012 ) 

1013 

1014 

1015def _p_value(df: DataFrame, arg_dict: Dict) -> float: 

1016 if df[arg_dict[METHOD]].values[0] == CHI2 and (df[NIM].notna()).any(): 

1017 raise ValueError("Non-inferiority margins not supported in ChiSquared. Use StudentsTTest or ZTest instead.") 

1018 return confidence_computers[df[arg_dict[METHOD]].values[0]].p_value(df, arg_dict) 

1019 

1020 

1021def _powered_effect_and_required_sample_size_from_difference_df(df: DataFrame, arg_dict: Dict) -> DataFrame: 

1022 if df[arg_dict[METHOD]].values[0] not in [ZTEST, ZTESTLINREG] and arg_dict[MDE] in df: 

1023 raise ValueError("Minimum detectable effects only supported for ZTest.") 

1024 elif df[arg_dict[METHOD]].values[0] not in [ZTEST, ZTESTLINREG] or (df[ADJUSTED_POWER].isna()).any(): 

1025 df[POWERED_EFFECT] = None 

1026 df[REQUIRED_SAMPLE_SIZE] = None 

1027 df[REQUIRED_SAMPLE_SIZE_METRIC] = None 

1028 return df 

1029 else: 

1030 n1, n2 = df[arg_dict[DENOMINATOR] + SFX1], df[arg_dict[DENOMINATOR] + SFX2] 

1031 kappa = n1 / n2 

1032 binary = (df[arg_dict[NUMERATOR_SUM_OF_SQUARES] + SFX1] == df[arg_dict[NUMERATOR] + SFX1]).all() 

1033 proportion_of_total = (n1 + n2) / df[f"current_total_{arg_dict[DENOMINATOR]}"] 

1034 

1035 z_alpha = st.norm.ppf( 

1036 1 

1037 - df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1) 

1038 ) 

1039 z_power = st.norm.ppf(df[ADJUSTED_POWER].values[0]) 

1040 

1041 nim = df[NIM].values[0] 

1042 if isinstance(nim, float): 

1043 non_inferiority = not isnan(nim) 

1044 elif nim is None: 

1045 non_inferiority = nim is not None 

1046 

1047 df[POWERED_EFFECT] = confidence_computers[df[arg_dict[METHOD]].values[0]].powered_effect( 

1048 df=df.assign(kappa=kappa) 

1049 .assign(current_number_of_units=df[f"current_total_{arg_dict[DENOMINATOR]}"]) 

1050 .assign(proportion_of_total=proportion_of_total), 

1051 z_alpha=z_alpha, 

1052 z_power=z_power, 

1053 binary=binary, 

1054 non_inferiority=non_inferiority, 

1055 avg_column=POINT_ESTIMATE + SFX1, 

1056 var_column=VARIANCE + SFX1, 

1057 ) 

1058 

1059 if ALTERNATIVE_HYPOTHESIS in df and NULL_HYPOTHESIS in df and (df[ALTERNATIVE_HYPOTHESIS].notna()).all(): 

1060 df[REQUIRED_SAMPLE_SIZE] = confidence_computers[df[arg_dict[METHOD]].values[0]].required_sample_size( 

1061 proportion_of_total=1, 

1062 z_alpha=z_alpha, 

1063 z_power=z_power, 

1064 binary=binary, 

1065 non_inferiority=non_inferiority, 

1066 hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS], 

1067 control_avg=df[POINT_ESTIMATE + SFX1], 

1068 control_var=df[VARIANCE + SFX1], 

1069 kappa=kappa, 

1070 ) 

1071 df[REQUIRED_SAMPLE_SIZE_METRIC] = confidence_computers[ 

1072 df[arg_dict[METHOD]].values[0] 

1073 ].required_sample_size( 

1074 proportion_of_total=proportion_of_total, 

1075 z_alpha=z_alpha, 

1076 z_power=z_power, 

1077 binary=binary, 

1078 non_inferiority=non_inferiority, 

1079 hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS], 

1080 control_avg=df[POINT_ESTIMATE + SFX1], 

1081 control_var=df[VARIANCE + SFX1], 

1082 kappa=kappa, 

1083 ) 

1084 else: 

1085 df[REQUIRED_SAMPLE_SIZE] = None 

1086 df[REQUIRED_SAMPLE_SIZE_METRIC] = None 

1087 

1088 return df 

1089 

1090 

1091def _compute_sample_sizes_and_ci_widths(df: DataFrame, arg_dict: Dict) -> DataFrame: 

1092 return df.pipe(_sample_size_from_summary_df, arg_dict=arg_dict).pipe(_ci_width, arg_dict=arg_dict) 

1093 

1094 

1095def _sample_size_from_summary_df(df: DataFrame, arg_dict: Dict) -> DataFrame: 

1096 if df[arg_dict[METHOD]].values[0] != ZTEST in df: 

1097 raise ValueError("Sample size calculation only supported for ZTest.") 

1098 elif df[arg_dict[METHOD]].values[0] != ZTEST or (df[ADJUSTED_POWER].isna()).any(): 

1099 df[REQUIRED_SAMPLE_SIZE_METRIC] = None 

1100 else: 

1101 all_weights = arg_dict[TREATMENT_WEIGHTS] 

1102 control_weight, treatment_weights = all_weights[0], all_weights[1:] 

1103 

1104 binary = df[arg_dict[IS_BINARY]].values[0] 

1105 z_alpha = st.norm.ppf( 

1106 1 

1107 - df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1) 

1108 ) 

1109 z_power = st.norm.ppf(df[ADJUSTED_POWER].values[0]) 

1110 non_inferiority = is_non_inferiority(df[NIM].values[0]) 

1111 

1112 max_sample_size = 0 

1113 for treatment_weight in treatment_weights: 

1114 kappa = control_weight / treatment_weight 

1115 proportion_of_total = (control_weight + treatment_weight) / sum(all_weights) 

1116 

1117 if ALTERNATIVE_HYPOTHESIS in df and NULL_HYPOTHESIS in df and (df[ALTERNATIVE_HYPOTHESIS].notna()).all(): 

1118 this_sample_size = confidence_computers[df[arg_dict[METHOD]].values[0]].required_sample_size( 

1119 proportion_of_total=proportion_of_total, 

1120 z_alpha=z_alpha, 

1121 z_power=z_power, 

1122 binary=binary, 

1123 non_inferiority=non_inferiority, 

1124 hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS], 

1125 control_avg=df[POINT_ESTIMATE], 

1126 control_var=df[VARIANCE], 

1127 kappa=kappa, 

1128 ) 

1129 max_sample_size = max(this_sample_size.max(), max_sample_size) 

1130 

1131 df[REQUIRED_SAMPLE_SIZE_METRIC] = None if max_sample_size == 0 else max_sample_size 

1132 

1133 return df 

1134 

1135 

1136def _compute_powered_effects(df: DataFrame, arg_dict: Dict) -> DataFrame: 

1137 return df.pipe(_powered_effect_from_summary_df, arg_dict=arg_dict) 

1138 

1139 

1140def _powered_effect_from_summary_df(df: DataFrame, arg_dict: Dict) -> DataFrame: 

1141 if df[arg_dict[METHOD]].values[0] != ZTEST in df: 

1142 raise ValueError("Powered effect calculation only supported for ZTest.") 

1143 elif df[arg_dict[METHOD]].values[0] != ZTEST or (df[ADJUSTED_POWER].isna()).any(): 

1144 df[REQUIRED_SAMPLE_SIZE_METRIC] = None 

1145 else: 

1146 all_weights = arg_dict[TREATMENT_WEIGHTS] 

1147 control_weight, treatment_weights = all_weights[0], all_weights[1:] 

1148 

1149 current_number_of_units = arg_dict[FINAL_EXPECTED_SAMPLE_SIZE] 

1150 

1151 binary = df[arg_dict[IS_BINARY]].values[0] 

1152 z_alpha = st.norm.ppf( 

1153 1 

1154 - df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1) 

1155 ) 

1156 z_power = st.norm.ppf(df[ADJUSTED_POWER].values[0]) 

1157 non_inferiority = is_non_inferiority(df[NIM].values[0]) 

1158 

1159 max_powered_effect = 0 

1160 for treatment_weight in treatment_weights: 

1161 kappa = control_weight / treatment_weight 

1162 proportion_of_total = (control_weight + treatment_weight) / sum(all_weights) 

1163 

1164 this_powered_effect = df[POWERED_EFFECT] = confidence_computers[ 

1165 df[arg_dict[METHOD]].values[0] 

1166 ].powered_effect( 

1167 df=df.assign(kappa=kappa) 

1168 .assign(current_number_of_units=current_number_of_units) 

1169 .assign(proportion_of_total=proportion_of_total), 

1170 z_alpha=z_alpha, 

1171 z_power=z_power, 

1172 binary=binary, 

1173 non_inferiority=non_inferiority, 

1174 avg_column=POINT_ESTIMATE, 

1175 var_column=VARIANCE, 

1176 ) 

1177 

1178 max_powered_effect = max(this_powered_effect.max(), max_powered_effect) 

1179 

1180 df[POWERED_EFFECT] = None if max_powered_effect == 0 else max_powered_effect 

1181 

1182 return df 

1183 

1184 

1185def _ci_width(df: DataFrame, arg_dict: Dict) -> DataFrame: 

1186 expected_sample_size = ( 

1187 None if arg_dict[FINAL_EXPECTED_SAMPLE_SIZE] is None else df[arg_dict[FINAL_EXPECTED_SAMPLE_SIZE]].values[0] 

1188 ) 

1189 if expected_sample_size is None or np.isnan(expected_sample_size): 

1190 return df.assign(**{CI_WIDTH: None}) 

1191 

1192 all_weights = arg_dict[TREATMENT_WEIGHTS] 

1193 control_weight, treatment_weights = all_weights[0], all_weights[1:] 

1194 sum_of_weights = sum(all_weights) 

1195 

1196 control_count = int((control_weight / sum_of_weights) * expected_sample_size) 

1197 if control_count == 0: 

1198 return df.assign(**{CI_WIDTH: float("inf")}) 

1199 

1200 else: 

1201 binary = df[arg_dict[IS_BINARY]].values[0] 

1202 z_alpha = st.norm.ppf( 

1203 1 

1204 - df[ADJUSTED_ALPHA_POWER_SAMPLE_SIZE].values[0] / (2 if df[PREFERENCE_TEST].values[0] == TWO_SIDED else 1) 

1205 ) 

1206 

1207 non_inferiority = is_non_inferiority(df[NIM].values[0]) 

1208 max_ci_width = 0 

1209 for treatment_weight in treatment_weights: 

1210 treatment_count = int((treatment_weight / sum_of_weights) * expected_sample_size) 

1211 if treatment_count == 0: 

1212 return df.assign(**{CI_WIDTH: float("inf")}) 

1213 else: 

1214 comparison_ci_width = confidence_computers[df[arg_dict[METHOD]].values[0]].ci_width( 

1215 z_alpha=z_alpha, 

1216 binary=binary, 

1217 non_inferiority=non_inferiority, 

1218 hypothetical_effect=df[ALTERNATIVE_HYPOTHESIS] - df[NULL_HYPOTHESIS], 

1219 control_avg=df[POINT_ESTIMATE], 

1220 control_var=df[VARIANCE], 

1221 control_count=control_count, 

1222 treatment_count=treatment_count, 

1223 ) 

1224 

1225 max_ci_width = max(comparison_ci_width.max(), max_ci_width) 

1226 

1227 df[CI_WIDTH] = None if max_ci_width == 0 else max_ci_width 

1228 

1229 return df 

1230 

1231 

1232def _optimal_kappa(row: Series, is_binary_column) -> float: 

1233 def _binary_variance(p: float) -> float: 

1234 return p * (1 - p) 

1235 

1236 if row[is_binary_column]: 

1237 if is_non_inferiority(row[NIM]): 

1238 return 1.0 

1239 else: 

1240 if row[POINT_ESTIMATE] == 0.0: 

1241 # variance will be 0 as well in this case. This if-branch is important to avoid divide by zero problems 

1242 return 1.0 

1243 else: 

1244 hypothetical_effect = row[ALTERNATIVE_HYPOTHESIS] - row[NULL_HYPOTHESIS] 

1245 return np.sqrt( 

1246 _binary_variance(row[POINT_ESTIMATE]) / _binary_variance(row[POINT_ESTIMATE] + hypothetical_effect) 

1247 ) 

1248 else: 

1249 return 1.0 

1250 

1251 

1252def _optimal_weights(kappa: float, number_of_groups) -> Iterable: 

1253 treatment_weight = 1 / (kappa + number_of_groups - 1) 

1254 control_weight = kappa * treatment_weight 

1255 return [control_weight] + [treatment_weight for _ in range(number_of_groups - 1)] 

1256 

1257 

1258def _find_optimal_group_weights_across_rows( 

1259 df: DataFrame, group_count: int, group_columns: Iterable, arg_dict: Dict 

1260) -> (List[float], int): 

1261 min_kappa = min(df[OPTIMAL_KAPPA]) 

1262 max_kappa = max(df[OPTIMAL_KAPPA]) 

1263 

1264 if min_kappa == max_kappa: 

1265 optimal_weights = df[OPTIMAL_WEIGHTS][0] 

1266 optimal_sample_size = _calculate_optimal_sample_size_given_weights( 

1267 df, optimal_weights, group_columns, arg_dict 

1268 ) 

1269 return optimal_weights, optimal_sample_size 

1270 

1271 in_between_kappas = np.linspace(min_kappa, max_kappa, 100) 

1272 min_optimal_sample_size = float("inf") 

1273 optimal_weights = [] 

1274 for kappa in in_between_kappas: 

1275 weights = _optimal_weights(kappa, group_count) 

1276 optimal_sample_size = _calculate_optimal_sample_size_given_weights(df, weights, group_columns, arg_dict) 

1277 if optimal_sample_size is not None and optimal_sample_size < min_optimal_sample_size: 

1278 min_optimal_sample_size = optimal_sample_size 

1279 optimal_weights = weights 

1280 min_optimal_sample_size = np.nan if min_optimal_sample_size == 0 else min_optimal_sample_size 

1281 return optimal_weights, min_optimal_sample_size 

1282 

1283 

1284def _calculate_optimal_sample_size_given_weights( 

1285 df: DataFrame, optimal_weights: List[float], group_columns: Iterable, arg_dict: Dict 

1286) -> int: 

1287 arg_dict[TREATMENT_WEIGHTS] = optimal_weights 

1288 sample_size_df = groupbyApplyParallel( 

1289 df.groupby(group_columns, as_index=False, sort=False), 

1290 lambda df: _sample_size_from_summary_df(df, arg_dict=arg_dict), 

1291 ) 

1292 

1293 if sample_size_df[REQUIRED_SAMPLE_SIZE_METRIC].isna().all(): 

1294 return None 

1295 optimal_sample_size = sample_size_df[REQUIRED_SAMPLE_SIZE_METRIC].max() 

1296 

1297 return np.ceil(optimal_sample_size) if np.isfinite(optimal_sample_size) else optimal_sample_size