#!/usr/bin/env python3
# Timestamp: "2025-10-01 16:30:00 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_mcnemar.py
# ----------------------------------------
r"""McNemar's test for paired categorical data.
Functionalities:
- McNemar's test for paired categorical data
- Test for marginal homogeneity in 2×2 contingency tables
- Compute odds ratio for matched pairs
- Generate visualizations for paired outcomes
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: 2×2 contingency table (array or DataFrame)
- output: Test results (dict or DataFrame) and optional figure
"""
from __future__ import annotations
import os
from typing import Literal, Optional, Union
import matplotlib.axes
import numpy as np
import pandas as pd
import scitex as stx
from scipy import stats
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars
from scitex_stats._utils._normalizers import convert_results, force_dataframe
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
logger = getLogger(__name__)
def mcnemar_odds_ratio(b: int, c: int) -> float:
"""
Compute odds ratio for McNemar's test from discordant pairs.
Parameters
----------
b : int
Count in cell [0,1] (success before, failure after)
c : int
Count in cell [1,0] (failure before, success after)
Returns
-------
or_val : float
Odds ratio = b / c
Notes
-----
For McNemar's test, OR = b/c where b and c are the discordant pairs.
OR > 1 indicates more b than c (positive change)
OR < 1 indicates more c than b (negative change)
OR = 1 indicates no change
"""
if c == 0:
if b == 0:
return 1.0 # No discordant pairs
return float("inf") # Only b discordant pairs
return float(b / c)
def interpret_mcnemar_or(or_val: float) -> str:
"""
Interpret McNemar's odds ratio.
Parameters
----------
or_val : float
Odds ratio
Returns
-------
interpretation : str
Interpretation
"""
if or_val == 1.0:
return "no change"
elif or_val > 1.0:
if or_val < 2.0:
return "small increase"
elif or_val < 4.0:
return "medium increase"
else:
return "large increase"
else: # or_val < 1.0
if or_val > 0.5:
return "small decrease"
elif or_val > 0.25:
return "medium decrease"
else:
return "large decrease"
[docs]
def test_mcnemar( # noqa: C901
observed: Union[np.ndarray, pd.DataFrame, list],
var_before: Optional[str] = None,
var_after: Optional[str] = None,
correction: bool = True,
alpha: float = 0.05,
plot: bool = False,
ax: Optional[matplotlib.axes.Axes] = None,
return_as: Literal["dict", "dataframe"] = "dict",
decimals: int = 3,
verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
r"""
Perform McNemar's test for paired categorical data.
Tests whether there is a significant change in proportions for paired binary data.
Appropriate for before-after studies with binary outcomes.
Parameters
----------
observed : array-like, shape (2, 2)
2×2 contingency table:
[[a, b],
[c, d]]
where:
- a: both conditions negative (0,0)
- b: before negative, after positive (0,1)
- c: before positive, after negative (1,0)
- d: both conditions positive (1,1)
var_before : str, optional
Name for before condition
var_after : str, optional
Name for after condition
correction : bool, default True
Whether to apply continuity correction (recommended for small samples)
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
ax : matplotlib.axes.Axes, optional
Axes to plot on. If provided, plot is set to True
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
decimals : int, default 3
Number of decimal places for rounding
verbose : bool, default False
If True, print test results to logger
Returns
-------
result : dict or DataFrame
Test results including:
- test_method: Name of test
- statistic: χ² statistic
- pvalue: p-value
- df: degrees of freedom (always 1)
- b: count of (before=0, after=1)
- c: count of (before=1, after=0)
- odds_ratio: b / c
- effect_size: odds ratio
- effect_size_interpretation: interpretation
- significant: whether to reject null hypothesis
- stars: significance stars
Notes
-----
McNemar's test is used for paired nominal data, testing whether row and column
marginal frequencies are equal (marginal homogeneity).
The test statistic is based on the discordant pairs (b and c):
.. math::
\\chi^2 = \\frac{(b - c)^2}{b + c} \\quad \\text{(without correction)}
.. math::
\\chi^2 = \\frac{(|b - c| - 1)^2}{b + c} \\quad \\text{(with correction)}
**Null hypothesis:** The marginal proportions are equal (no change)
**Alternative:** The marginal proportions differ (significant change)
**Assumptions:**
- Paired data (matched observations)
- Binary outcomes for both conditions
- Large enough sample (b + c ≥ 10 recommended for chi-square approximation)
**Effect size (Odds Ratio):**
OR = b / c
- OR = 1: no change
- OR > 1: increase (more transitions from 0→1 than 1→0)
- OR < 1: decrease (more transitions from 1→0 than 0→1)
Examples
--------
>>> import numpy as np
>>> from scitex_stats.tests.categorical import test_mcnemar
>>>
>>> # Example: Treatment effectiveness (before/after)
>>> # Rows: before, Columns: after
>>> # [[no→no, no→yes],
>>> # [yes→no, yes→yes]]
>>> observed = [[59, 6], # 59 stayed negative, 6 improved
... [16, 19]] # 16 relapsed, 19 stayed positive
>>>
>>> result = test_mcnemar(observed, var_before='Before Treatment',
... var_after='After Treatment', plot=True)
>>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
>>> print(f"Odds Ratio = {result['odds_ratio']:.2f}")
References
----------
.. [1] McNemar, Q. (1947). "Note on the sampling error of the difference between
correlated proportions or percentages". Psychometrika, 12(2), 153-157.
.. [2] Edwards, A. L. (1948). "Note on the correction for continuity in testing
the significance of the difference between correlated proportions".
Psychometrika, 13(3), 185-187.
See Also
--------
test_chi2 : For independent (unpaired) categorical data
test_fisher : For 2×2 tables with small expected frequencies
"""
# Convert to numpy array
if isinstance(observed, pd.DataFrame):
observed_array = observed.values
elif isinstance(observed, list):
observed_array = np.array(observed)
else:
observed_array = np.asarray(observed)
# Validate shape
if observed_array.shape != (2, 2):
raise ValueError(
f"McNemar's test requires a 2×2 table, got shape {observed_array.shape}"
)
# Extract cells
a, b = observed_array[0]
c, d = observed_array[1]
# Validate data types
if not all(
isinstance(x, (int, np.integer))
or (isinstance(x, (float, np.floating)) and x == int(x)) # type: ignore[unreachable]
for x in [a, b, c, d]
):
raise ValueError("All values must be non-negative integers (counts)")
if any(x < 0 for x in [a, b, c, d]):
raise ValueError("All counts must be non-negative")
a, b, c, d = int(a), int(b), int(c), int(d)
# Perform McNemar's test
# scipy.stats.contingency.mcnemar is not available in older scipy
# We'll compute manually
n_discordant = b + c
if n_discordant == 0:
# No discordant pairs - perfect agreement
statistic = 0.0
pvalue = 1.0
else:
if correction:
# With continuity correction
statistic = (abs(b - c) - 1.0) ** 2 / n_discordant
else:
# Without continuity correction
statistic = (b - c) ** 2 / n_discordant
# p-value from chi-square distribution with df=1
pvalue = 1.0 - stats.chi2.cdf(statistic, df=1)
# Compute odds ratio
odds_ratio = mcnemar_odds_ratio(b, c)
or_interpretation = interpret_mcnemar_or(odds_ratio)
# Variable names
var_before = var_before or "Before"
var_after = var_after or "After"
# Build result dictionary
result = {
"test_method": "McNemar's test",
"var_before": var_before,
"var_after": var_after,
"statistic": round(float(statistic), decimals),
"pvalue": round(float(pvalue), decimals + 1),
"df": 1,
"b": int(b), # Changed (0→1)
"c": int(c), # Changed (1→0)
"n_discordant": int(n_discordant),
"odds_ratio": (
round(float(odds_ratio), decimals)
if np.isfinite(odds_ratio)
else odds_ratio
),
"effect_size": (
round(float(odds_ratio), decimals)
if np.isfinite(odds_ratio)
else odds_ratio
),
"effect_size_metric": "Odds ratio",
"effect_size_interpretation": or_interpretation,
"correction": correction,
"alpha": alpha,
"significant": pvalue < alpha,
"stars": p2stars(pvalue),
"H0": "Marginal proportions are equal (no change)",
}
# Log results if verbose
if verbose:
logger.info(
f"McNemar: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
)
logger.info(f"OR = {odds_ratio:.3f}, {or_interpretation} (b={b}, c={c})")
# Auto-enable plotting if ax is provided
if ax is not None:
plot = True
# Generate plot if requested
if plot:
if ax is None:
fig, axes = stx.plt.subplots(1, 3, figsize=(12, 4))
_plot_mcnemar_full(observed_array, result, var_before, var_after, axes)
else:
_plot_mcnemar_simple(observed_array, result, var_before, var_after, ax)
# Convert to requested format
if return_as == "dataframe":
result = force_dataframe(result)
elif return_as not in ["dict", "dataframe"]:
return convert_results(result, return_as=return_as)
return result
def _plot_mcnemar_full(observed, result, var_before, var_after, axes):
"""Create 3-panel visualization for McNemar's test."""
a, b = observed[0]
c, d = observed[1]
# Panel 1: Contingency table heatmap
ax = axes[0]
im = ax.imshow(observed, cmap="Blues", aspect="auto")
# Add text annotations
for i in range(2):
for j in range(2):
ax.text(
j,
i,
int(observed[i, j]),
ha="center",
va="center",
color="black",
)
ax.set_xticks([0, 1])
ax.set_yticks([0, 1])
ax.set_xticklabels(["0", "1"])
ax.set_yticklabels(["0", "1"])
ax.set_xlabel(var_after)
ax.set_ylabel(var_before)
ax.set_title("Contingency Table")
stx.plt.colorbar(im, ax=ax)
# Panel 2: Discordant pairs comparison
ax = axes[1]
categories = ["0→1\n(b)", "1→0\n(c)"]
counts = [b, c]
bars = ax.bar(categories, counts)
# Add count labels
for bar, count in zip(bars, counts):
height = bar.get_height()
ax.text(
bar.get_x() + bar.get_width() / 2.0,
height,
f"{int(count)}",
ha="center",
va="bottom",
)
ax.set_ylabel("Count")
ax.set_title("Discordant Pairs")
ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1)
# Panel 3: McNemar's Test stats
ax = axes[2]
ax.axis("off")
ax.set_title("McNemar's Test")
# Add stats text box
stars_text = result["stars"].replace("ns", "$n$s")
if np.isfinite(result["odds_ratio"]):
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}\n"
f"$OR$ = {result['odds_ratio']:.3f}"
)
else:
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}"
)
ax.text(
0.5,
0.5,
text_str,
transform=ax.transAxes,
fontsize=10,
verticalalignment="center",
horizontalalignment="center",
color="black",
)
def _plot_mcnemar_simple(observed, result, var_before, var_after, ax):
"""Create simplified single-panel discordant pairs plot on given axes."""
a, b = observed[0]
c, d = observed[1]
# Discordant pairs comparison
categories = ["0→1\n(b)", "1→0\n(c)"]
counts = [b, c]
bars = ax.bar(categories, counts)
# Add count labels
for bar, count in zip(bars, counts):
height = bar.get_height()
ax.text(
bar.get_x() + bar.get_width() / 2.0,
height,
f"{int(count)}",
ha="center",
va="bottom",
)
ax.set_ylabel("Count")
ax.set_title("McNemar's Test")
ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1)
# Add stats text box
stars_text = result["stars"].replace("ns", "$n$s")
if np.isfinite(result["odds_ratio"]):
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}\n"
f"$OR$ = {result['odds_ratio']:.3f}"
)
else:
text_str = (
f"$\\chi^2$ = {result['statistic']:.3f}\n"
f"$p$ = {result['pvalue']:.4f} {stars_text}\n"
f"$b$ = {result['b']}, $c$ = {result['c']}"
)
ax.text(
0.02,
0.98,
text_str,
transform=ax.transAxes,
verticalalignment="top",
color="black",
fontsize=6,
)
# Demo: python -m scitex_stats.tests.categorical._demo_mcnemar
# EOF