Source code for scitex_stats.tests.categorical._test_mcnemar

#!/usr/bin/env python3
# Timestamp: "2025-10-01 16:30:00 (ywatanabe)"
# File: scitex_stats/tests/categorical/_test_mcnemar.py
# ----------------------------------------

r"""McNemar's test for paired categorical data.

Functionalities:
  - McNemar's test for paired categorical data
  - Test for marginal homogeneity in 2×2 contingency tables
  - Compute odds ratio for matched pairs
  - Generate visualizations for paired outcomes

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: 2×2 contingency table (array or DataFrame)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import os
from typing import Literal, Optional, Union

import matplotlib.axes
import numpy as np
import pandas as pd
import matplotlib.pyplot as _mpl_plt  # noqa: E402
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import p2stars
from scitex_stats._utils._normalizers import convert_results, force_dataframe

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


def mcnemar_odds_ratio(b: int, c: int) -> float:
    """
    Compute odds ratio for McNemar's test from discordant pairs.

    Parameters
    ----------
    b : int
        Count in cell [0,1] (success before, failure after)
    c : int
        Count in cell [1,0] (failure before, success after)

    Returns
    -------
    or_val : float
        Odds ratio = b / c

    Notes
    -----
    For McNemar's test, OR = b/c where b and c are the discordant pairs.
    OR > 1 indicates more b than c (positive change)
    OR < 1 indicates more c than b (negative change)
    OR = 1 indicates no change
    """
    if c == 0:
        if b == 0:
            return 1.0  # No discordant pairs
        return float("inf")  # Only b discordant pairs
    return float(b / c)


def interpret_mcnemar_or(or_val: float) -> str:
    """
    Interpret McNemar's odds ratio.

    Parameters
    ----------
    or_val : float
        Odds ratio

    Returns
    -------
    interpretation : str
        Interpretation
    """
    if or_val == 1.0:
        return "no change"
    elif or_val > 1.0:
        if or_val < 2.0:
            return "small increase"
        elif or_val < 4.0:
            return "medium increase"
        else:
            return "large increase"
    else:  # or_val < 1.0
        if or_val > 0.5:
            return "small decrease"
        elif or_val > 0.25:
            return "medium decrease"
        else:
            return "large decrease"


[docs] def test_mcnemar( # noqa: C901 observed: Union[np.ndarray, pd.DataFrame, list], var_before: Optional[str] = None, var_after: Optional[str] = None, correction: bool = True, alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform McNemar's test for paired categorical data. Tests whether there is a significant change in proportions for paired binary data. Appropriate for before-after studies with binary outcomes. Parameters ---------- observed : array-like, shape (2, 2) 2×2 contingency table: [[a, b], [c, d]] where: - a: both conditions negative (0,0) - b: before negative, after positive (0,1) - c: before positive, after negative (1,0) - d: both conditions positive (1,1) var_before : str, optional Name for before condition var_after : str, optional Name for after condition correction : bool, default True Whether to apply continuity correction (recommended for small samples) alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization ax : matplotlib.axes.Axes, optional Axes to plot on. If provided, plot is set to True return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding verbose : bool, default False If True, print test results to logger Returns ------- result : dict or DataFrame Test results including: - test_method: Name of test - statistic: χ² statistic - pvalue: p-value - df: degrees of freedom (always 1) - b: count of (before=0, after=1) - c: count of (before=1, after=0) - odds_ratio: b / c - effect_size: odds ratio - effect_size_interpretation: interpretation - significant: whether to reject null hypothesis - stars: significance stars Notes ----- McNemar's test is used for paired nominal data, testing whether row and column marginal frequencies are equal (marginal homogeneity). The test statistic is based on the discordant pairs (b and c): .. math:: \\chi^2 = \\frac{(b - c)^2}{b + c} \\quad \\text{(without correction)} .. math:: \\chi^2 = \\frac{(|b - c| - 1)^2}{b + c} \\quad \\text{(with correction)} **Null hypothesis:** The marginal proportions are equal (no change) **Alternative:** The marginal proportions differ (significant change) **Assumptions:** - Paired data (matched observations) - Binary outcomes for both conditions - Large enough sample (b + c ≥ 10 recommended for chi-square approximation) **Effect size (Odds Ratio):** OR = b / c - OR = 1: no change - OR > 1: increase (more transitions from 0→1 than 1→0) - OR < 1: decrease (more transitions from 1→0 than 0→1) Examples -------- >>> import numpy as np >>> from scitex_stats.tests.categorical import test_mcnemar >>> >>> # Example: Treatment effectiveness (before/after) >>> # Rows: before, Columns: after >>> # [[no→no, no→yes], >>> # [yes→no, yes→yes]] >>> observed = [[59, 6], # 59 stayed negative, 6 improved ... [16, 19]] # 16 relapsed, 19 stayed positive >>> >>> result = test_mcnemar(observed, var_before='Before Treatment', ... var_after='After Treatment', plot=True) >>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}") >>> print(f"Odds Ratio = {result['odds_ratio']:.2f}") References ---------- .. [1] McNemar, Q. (1947). "Note on the sampling error of the difference between correlated proportions or percentages". Psychometrika, 12(2), 153-157. .. [2] Edwards, A. L. (1948). "Note on the correction for continuity in testing the significance of the difference between correlated proportions". Psychometrika, 13(3), 185-187. See Also -------- test_chi2 : For independent (unpaired) categorical data test_fisher : For 2×2 tables with small expected frequencies """ # Convert to numpy array if isinstance(observed, pd.DataFrame): observed_array = observed.values elif isinstance(observed, list): observed_array = np.array(observed) else: observed_array = np.asarray(observed) # Validate shape if observed_array.shape != (2, 2): raise ValueError( f"McNemar's test requires a 2×2 table, got shape {observed_array.shape}" ) # Extract cells a, b = observed_array[0] c, d = observed_array[1] # Validate data types if not all( isinstance(x, (int, np.integer)) or (isinstance(x, (float, np.floating)) and x == int(x)) # type: ignore[unreachable] for x in [a, b, c, d] ): raise ValueError("All values must be non-negative integers (counts)") if any(x < 0 for x in [a, b, c, d]): raise ValueError("All counts must be non-negative") a, b, c, d = int(a), int(b), int(c), int(d) # Perform McNemar's test # scipy.stats.contingency.mcnemar is not available in older scipy # We'll compute manually n_discordant = b + c if n_discordant == 0: # No discordant pairs - perfect agreement statistic = 0.0 pvalue = 1.0 else: if correction: # With continuity correction statistic = (abs(b - c) - 1.0) ** 2 / n_discordant else: # Without continuity correction statistic = (b - c) ** 2 / n_discordant # p-value from chi-square distribution with df=1 pvalue = 1.0 - stats.chi2.cdf(statistic, df=1) # Compute odds ratio odds_ratio = mcnemar_odds_ratio(b, c) or_interpretation = interpret_mcnemar_or(odds_ratio) # Variable names var_before = var_before or "Before" var_after = var_after or "After" # Build result dictionary result = { "test_method": "McNemar's test", "var_before": var_before, "var_after": var_after, "statistic": round(float(statistic), decimals), "pvalue": round(float(pvalue), decimals + 1), "df": 1, "b": int(b), # Changed (0→1) "c": int(c), # Changed (1→0) "n_discordant": int(n_discordant), "odds_ratio": ( round(float(odds_ratio), decimals) if np.isfinite(odds_ratio) else odds_ratio ), "effect_size": ( round(float(odds_ratio), decimals) if np.isfinite(odds_ratio) else odds_ratio ), "effect_size_metric": "Odds ratio", "effect_size_interpretation": or_interpretation, "correction": correction, "alpha": alpha, "significant": pvalue < alpha, "stars": p2stars(pvalue), "H0": "Marginal proportions are equal (no change)", } # Log results if verbose if verbose: logger.info( f"McNemar: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}" ) logger.info(f"OR = {odds_ratio:.3f}, {or_interpretation} (b={b}, c={c})") # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: fig, axes = _mpl_plt.subplots(1, 3, figsize=(12, 4)) _plot_mcnemar_full(observed_array, result, var_before, var_after, axes) else: _plot_mcnemar_simple(observed_array, result, var_before, var_after, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) elif return_as not in ["dict", "dataframe"]: return convert_results(result, return_as=return_as) return result
def _plot_mcnemar_full(observed, result, var_before, var_after, axes): """Create 3-panel visualization for McNemar's test.""" a, b = observed[0] c, d = observed[1] # Panel 1: Contingency table heatmap ax = axes[0] im = ax.imshow(observed, cmap="Blues", aspect="auto") # Add text annotations for i in range(2): for j in range(2): ax.text( j, i, int(observed[i, j]), ha="center", va="center", color="black", ) ax.set_xticks([0, 1]) ax.set_yticks([0, 1]) ax.set_xticklabels(["0", "1"]) ax.set_yticklabels(["0", "1"]) ax.set_xlabel(var_after) ax.set_ylabel(var_before) ax.set_title("Contingency Table") _mpl_plt.colorbar(im, ax=ax) # Panel 2: Discordant pairs comparison ax = axes[1] categories = ["0→1\n(b)", "1→0\n(c)"] counts = [b, c] bars = ax.bar(categories, counts) # Add count labels for bar, count in zip(bars, counts): height = bar.get_height() ax.text( bar.get_x() + bar.get_width() / 2.0, height, f"{int(count)}", ha="center", va="bottom", ) ax.set_ylabel("Count") ax.set_title("Discordant Pairs") ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1) # Panel 3: McNemar's Test stats ax = axes[2] ax.axis("off") ax.set_title("McNemar's Test") # Add stats text box stars_text = result["stars"].replace("ns", "$n$s") if np.isfinite(result["odds_ratio"]): text_str = ( f"$\\chi^2$ = {result['statistic']:.3f}\n" f"$p$ = {result['pvalue']:.4f} {stars_text}\n" f"$b$ = {result['b']}, $c$ = {result['c']}\n" f"$OR$ = {result['odds_ratio']:.3f}" ) else: text_str = ( f"$\\chi^2$ = {result['statistic']:.3f}\n" f"$p$ = {result['pvalue']:.4f} {stars_text}\n" f"$b$ = {result['b']}, $c$ = {result['c']}" ) ax.text( 0.5, 0.5, text_str, transform=ax.transAxes, fontsize=10, verticalalignment="center", horizontalalignment="center", color="black", ) def _plot_mcnemar_simple(observed, result, var_before, var_after, ax): """Create simplified single-panel discordant pairs plot on given axes.""" a, b = observed[0] c, d = observed[1] # Discordant pairs comparison categories = ["0→1\n(b)", "1→0\n(c)"] counts = [b, c] bars = ax.bar(categories, counts) # Add count labels for bar, count in zip(bars, counts): height = bar.get_height() ax.text( bar.get_x() + bar.get_width() / 2.0, height, f"{int(count)}", ha="center", va="bottom", ) ax.set_ylabel("Count") ax.set_title("McNemar's Test") ax.set_ylim(0, max(counts) * 1.2 if max(counts) > 0 else 1) # Add stats text box stars_text = result["stars"].replace("ns", "$n$s") if np.isfinite(result["odds_ratio"]): text_str = ( f"$\\chi^2$ = {result['statistic']:.3f}\n" f"$p$ = {result['pvalue']:.4f} {stars_text}\n" f"$b$ = {result['b']}, $c$ = {result['c']}\n" f"$OR$ = {result['odds_ratio']:.3f}" ) else: text_str = ( f"$\\chi^2$ = {result['statistic']:.3f}\n" f"$p$ = {result['pvalue']:.4f} {stars_text}\n" f"$b$ = {result['b']}, $c$ = {result['c']}" ) ax.text( 0.02, 0.98, text_str, transform=ax.transAxes, verticalalignment="top", color="black", fontsize=6, ) # Demo: python -m scitex_stats.tests.categorical._demo_mcnemar # EOF