ucb_counts = pd.DataFrame(
[
("A", "Male", 512, 313),
("A", "Female", 89, 19),
("B", "Male", 353, 207),
("B", "Female", 17, 8),
("C", "Male", 120, 205),
("C", "Female", 202, 391),
("D", "Male", 138, 279),
("D", "Female", 131, 244),
("E", "Male", 53, 138),
("E", "Female", 94, 299),
("F", "Male", 22, 351),
("F", "Female", 24, 317),
],
columns=["department", "gender", "admitted", "rejected"],
)
ucb_counts["applications"] = ucb_counts["admitted"] + ucb_counts["rejected"]
ucb_counts["admit_rate"] = ucb_counts["admitted"] / ucb_counts["applications"]
pooled = (
ucb_counts.groupby("gender")[["admitted", "applications"]]
.sum()
.assign(admit_rate=lambda d: d["admitted"] / d["applications"])
)
pooled_diff = pooled.loc["Male", "admit_rate"] - pooled.loc["Female", "admit_rate"]
dept_rates = ucb_counts.pivot(index="department", columns="gender", values="admit_rate")
dept_rates["male_minus_female"] = dept_rates["Male"] - dept_rates["Female"]
pd.concat(
[
pd.DataFrame({"male_minus_female": [pooled_diff]}, index=["Pooled"]),
dept_rates[["male_minus_female"]],
]
)