Source code for scitex_stats.tests.nonparametric._test_wilcoxon

#!/usr/bin/env python3
# Timestamp: "2025-10-01 15:45:00 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_wilcoxon.py

r"""
Wilcoxon signed-rank test (non-parametric paired test).

Functionalities:
  - Perform Wilcoxon signed-rank test (non-parametric paired test)
  - Compute rank-biserial correlation effect size
  - Generate visualizations
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Two paired samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import Literal, Optional, Union

import matplotlib.axes
import numpy as np
import pandas as pd
from scipy import stats

import matplotlib.pyplot as _mpl_plt  # noqa: E402
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


[docs] def test_wilcoxon( # noqa: C901 x: Union[np.ndarray, pd.Series, str], y: Union[np.ndarray, pd.Series, str], var_x: str = "before", var_y: str = "after", alternative: Literal["two-sided", "greater", "less"] = "two-sided", alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, data: Union[pd.DataFrame, str, None] = None, return_as: Literal["dict", "dataframe"] = "dict", verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Wilcoxon signed-rank test (non-parametric paired test). Parameters ---------- x : array or Series First sample (e.g., pre-test, baseline) y : array or Series Second sample (e.g., post-test, follow-up) Must have same length as x var_x : str, default 'before' Label for first sample var_y : str, default 'after' Label for second sample alternative : {'two-sided', 'greater', 'less'}, default 'two-sided' Alternative hypothesis: - 'two-sided': distributions differ - 'greater': x tends to be greater than y - 'less': x tends to be less than y alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. data : DataFrame, str, or None, optional DataFrame or CSV path. When provided, string values for x/y are resolved as column names (seaborn-style). return_as : {'dict', 'dataframe'}, default 'dict' Output format verbose : bool, default False Whether to print test results Returns ------- results : dict or DataFrame Test results including: - test_method: 'Wilcoxon signed-rank test' - statistic: W-statistic (sum of signed ranks) - pvalue: p-value - stars: Significance stars - significant: Whether null hypothesis is rejected - effect_size: rank-biserial correlation - effect_size_metric: 'rank-biserial correlation' - n_pairs: number of pairs (excluding zeros) - n_zeros: number of zero differences (ties) Notes ----- The Wilcoxon signed-rank test is the non-parametric alternative to the paired t-test. It tests whether the median of differences is zero. **When to use:** - Paired samples (before-after, matched pairs) - Data are not normally distributed - Ordinal data or continuous data with outliers - Robust alternative to paired t-test **Assumptions:** - Paired observations - Differences are symmetric around the median - Ordinal or continuous data **How it works:** 1. Compute differences: d = x - y 2. Remove zero differences 3. Rank absolute differences 4. Sum ranks of positive differences (W+) 5. Sum ranks of negative differences (W-) 6. Test statistic: W = min(W+, W-) **Effect size** (rank-biserial correlation): .. math:: r = \frac{W_+ - W_-}{n(n+1)/2} Ranges from -1 to 1: - r close to 1: x > y (large positive effect) - r close to 0: no difference - r close to -1: x < y (large negative effect) Interpretation: - |r| < 0.1: negligible - |r| < 0.3: small - |r| < 0.5: medium - |r| ≥ 0.5: large References ---------- .. [1] Wilcoxon, F. (1945). "Individual comparisons by ranking methods". Biometrics Bulletin, 1(6), 80-83. .. [2] Kerby, D. S. (2014). "The simple difference formula: An approach to teaching nonparametric correlation". Comprehensive Psychology, 3, 11. Examples -------- >>> before = np.array([10, 12, 15, 18, 20]) >>> after = np.array([12, 14, 17, 20, 22]) >>> result = test_wilcoxon(before, after) >>> result['pvalue'] 0.062... >>> # With visualization >>> result, fig = test_wilcoxon(before, after, plot=True) """ # Resolve column names from DataFrame (seaborn-style data= parameter) if data is not None: from scitex_stats._utils._csv_support import resolve_columns resolved = resolve_columns(data, x=x, y=y) x, y = resolved["x"], resolved["y"] from scitex_stats._utils._formatters import p2stars from scitex_stats._utils._normalizers import force_dataframe # Convert to numpy arrays and remove NaN x = np.asarray(x) y = np.asarray(y) # Check for paired NaN removal valid_mask = ~(np.isnan(x) | np.isnan(y)) x = x[valid_mask] y = y[valid_mask] if len(x) != len(y): raise ValueError(f"Paired samples must have same length: {len(x)} vs {len(y)}") # Compute differences diff = x - y # Count zeros (ties) n_zeros = np.sum(diff == 0) # Perform Wilcoxon signed-rank test w_result = stats.wilcoxon(x, y, alternative=alternative, zero_method="wilcox") w_stat = float(w_result.statistic) pvalue = float(w_result.pvalue) # Compute rank-biserial correlation (effect size) # Remove zeros for ranking diff_nonzero = diff[diff != 0] n_nonzero = len(diff_nonzero) if n_nonzero > 0: # Rank absolute differences abs_diff = np.abs(diff_nonzero) ranks = stats.rankdata(abs_diff) # Sum of ranks for positive and negative differences w_plus = np.sum(ranks[diff_nonzero > 0]) w_minus = np.sum(ranks[diff_nonzero < 0]) # Rank-biserial correlation max_sum = n_nonzero * (n_nonzero + 1) / 2 r = (w_plus - w_minus) / max_sum effect_size = float(r) else: effect_size = 0.0 # Interpret effect size effect_size_abs = abs(effect_size) if effect_size_abs < 0.1: effect_size_interpretation = "negligible" elif effect_size_abs < 0.3: effect_size_interpretation = "small" elif effect_size_abs < 0.5: effect_size_interpretation = "medium" else: effect_size_interpretation = "large" # Create null hypothesis description if alternative == "two-sided": H0 = f"median({var_x} - {var_y}) = 0" elif alternative == "greater": H0 = f"median({var_x} - {var_y}) ≤ 0" else: # less H0 = f"median({var_x} - {var_y}) ≥ 0" # Compile results result = { "test_method": "Wilcoxon signed-rank test", "statistic": w_stat, "stat_symbol": "W", "alternative": alternative, "n_pairs": n_nonzero, "n_zeros": n_zeros, "var_x": var_x, "var_y": var_y, "pvalue": pvalue, "stars": p2stars(pvalue), "alpha": alpha, "significant": pvalue < alpha, "effect_size": effect_size, "effect_size_metric": "rank-biserial correlation", "effect_size_interpretation": effect_size_interpretation, "H0": H0, } # Log results if verbose if verbose: logger.info(f"Wilcoxon: W = {w_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}") logger.info( f"Rank-biserial r = {effect_size:.3f} ({effect_size_interpretation})" ) # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = _mpl_plt.subplots() _plot_wilcoxon(x, y, var_x, var_y, result, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) return result
def _plot_wilcoxon(x, y, var_x, var_y, result, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) violin_swarm(ax, [x, y], [0, 1], [var_x, var_y]) significance_bracket(ax, 0, 1, result["stars"], [x, y]) stats_text_box( ax, [ fmt_stat("W", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), fmt_stat("r", result["effect_size"]), f"{fmt_sym('n')} = {result['n_pairs']}", ], ) ax.set_title("Wilcoxon Signed-Rank Test") def main(args): # noqa: C901 """Demonstrate Wilcoxon signed-rank test functionality.""" import scitex as stx logger.info("Demonstrating Wilcoxon signed-rank test") # Set random seed np.random.seed(42) # Example 1: Normal data (compare with paired t-test) logger.info("\n=== Example 1: Normal data ===") before1 = np.random.normal(10, 2, 30) after1 = before1 + np.random.normal(2, 1, 30) result1 = test_wilcoxon( before1, after1, var_x="Before", var_y="After", verbose=True ) logger.info(f"W = {result1['statistic']:.0f}") logger.info(f"p = {result1['pvalue']:.4f} {result1['stars']}") logger.info( f"Effect size (r) = {result1['effect_size']:.3f} ({result1['effect_size_interpretation']})" ) # Example 2: Skewed data logger.info("\n=== Example 2: Skewed data ===") before2 = np.random.exponential(2, 30) after2 = before2 + np.random.exponential(1, 30) result2 = test_wilcoxon(before2, after2, verbose=True) logger.info(f"W = {result2['statistic']:.0f}") logger.info(f"p = {result2['pvalue']:.4f} {result2['stars']}") logger.info(f"Effect size (r) = {result2['effect_size']:.3f}") # Example 3: Data with outliers logger.info("\n=== Example 3: Data with outliers ===") before3 = np.concatenate([np.random.normal(10, 1, 28), [20, 25]]) # Add outliers after3 = np.concatenate([np.random.normal(12, 1, 28), [22, 27]]) result3 = test_wilcoxon(before3, after3, var_x="Pre", var_y="Post", verbose=True) logger.info(f"W = {result3['statistic']:.0f}") logger.info(f"p = {result3['pvalue']:.4f} {result3['stars']}") logger.info("Wilcoxon is robust to outliers") # Example 4: Small effect logger.info("\n=== Example 4: Small effect (borderline) ===") before4 = np.random.normal(10, 2, 25) after4 = before4 + np.random.normal(0.3, 1, 25) # Small change result4 = test_wilcoxon(before4, after4, verbose=True) logger.info(f"W = {result4['statistic']:.0f}") logger.info(f"p = {result4['pvalue']:.4f} {result4['stars']}") logger.info(f"Effect size (r) = {result4['effect_size']:.3f}") # Example 5: One-sided test logger.info("\n=== Example 5: One-sided test ===") before5 = np.random.normal(10, 2, 30) after5 = before5 + np.random.normal(1.5, 1, 30) result_two = test_wilcoxon(before5, after5, alternative="two-sided", verbose=True) result_less = test_wilcoxon(before5, after5, alternative="less", verbose=True) logger.info(f"Two-sided: p = {result_two['pvalue']:.4f} {result_two['stars']}") logger.info( f"One-sided (less): p = {result_less['pvalue']:.4f} {result_less['stars']}" ) # Example 6: With visualization (demonstrates plt.gcf() and stx.io.save()) logger.info("\n=== Example 6: With visualization ===") before6 = np.random.lognormal(2, 0.5, 40) after6 = before6 * np.random.lognormal(0.15, 0.3, 40) test_wilcoxon( before6, after6, var_x="Baseline", var_y="Treatment", plot=True, verbose=True, ) # Save the figure using plt.gcf() stx.io.save(_mpl_plt.gcf(), "./wilcoxon_demo.jpg") _mpl_plt.close() logger.info("Figure saved to wilcoxon_demo.jpg") # Example 7: Compare with paired t-test logger.info("\n=== Example 7: Wilcoxon vs Paired t-test ===") # For normal data normal_before = np.random.normal(10, 2, 50) normal_after = normal_before + np.random.normal(1, 1.5, 50) from ..parametric._test_ttest import test_ttest_rel result_wilcox = test_wilcoxon(normal_before, normal_after, verbose=True) result_ttest = test_ttest_rel(normal_before, normal_after) logger.info( f"Wilcoxon: p = {result_wilcox['pvalue']:.4f} {result_wilcox['stars']}, r = {result_wilcox['effect_size']:.3f}" ) logger.info( f"Paired t-test: p = {result_ttest['pvalue']:.4f} {result_ttest['stars']}, d = {result_ttest['effect_size']:.3f}" ) logger.info("For normal data, both tests should agree") # Example 8: Export results logger.info("\n=== Example 8: Multiple comparisons ===") from scitex_stats._utils._normalizers import combine_results, export_summary results_list = [] for i in range(5): before = np.random.exponential(2, 30) after = before + np.random.exponential(0.5, 30) result = test_wilcoxon(before, after, var_x=f"Pre_{i}", var_y=f"Post_{i}") results_list.append(result) df_all = combine_results(results_list) logger.info(f"\n{df_all[['var_x', 'var_y', 'pvalue', 'stars', 'effect_size']]}") export_summary(df_all, "./wilcoxon_results.csv") logger.info("Results exported") return 0 def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Demonstrate Wilcoxon signed-rank test" ) parser.add_argument("--verbose", action="store_true", help="Enable verbose output") return parser.parse_args() def run_main(): """Initialize SciTeX framework and run main.""" import scitex as stx import sys import matplotlib.pyplot as plt args = parse_args() CONFIG, sys.stdout, sys.stderr, plt, _CC, _rng_manager = stx.session.start( sys, plt, args=args, file=__file__, verbose=args.verbose, agg=True, ) exit_status = main(args) stx.session.close( CONFIG, verbose=args.verbose, exit_status=exit_status, ) if __name__ == "__main__": run_main() # EOF