Source code for scitex_stats.tests.nonparametric._test_wilcoxon

#!/usr/bin/env python3
# Timestamp: "2025-10-01 15:45:00 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_wilcoxon.py

r"""
Wilcoxon signed-rank test (non-parametric paired test).

Functionalities:
  - Perform Wilcoxon signed-rank test (non-parametric paired test)
  - Compute rank-biserial correlation effect size
  - Generate visualizations
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Two paired samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import Literal, Optional, Union

import matplotlib.axes
import numpy as np
import pandas as pd
from scipy import stats

import scitex as stx
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


[docs] def test_wilcoxon( # noqa: C901 x: Union[np.ndarray, pd.Series, str], y: Union[np.ndarray, pd.Series, str], var_x: str = "before", var_y: str = "after", alternative: Literal["two-sided", "greater", "less"] = "two-sided", alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, data: Union[pd.DataFrame, str, None] = None, return_as: Literal["dict", "dataframe"] = "dict", verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Wilcoxon signed-rank test (non-parametric paired test). Parameters ---------- x : array or Series First sample (e.g., pre-test, baseline) y : array or Series Second sample (e.g., post-test, follow-up) Must have same length as x var_x : str, default 'before' Label for first sample var_y : str, default 'after' Label for second sample alternative : {'two-sided', 'greater', 'less'}, default 'two-sided' Alternative hypothesis: - 'two-sided': distributions differ - 'greater': x tends to be greater than y - 'less': x tends to be less than y alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. data : DataFrame, str, or None, optional DataFrame or CSV path. When provided, string values for x/y are resolved as column names (seaborn-style). return_as : {'dict', 'dataframe'}, default 'dict' Output format verbose : bool, default False Whether to print test results Returns ------- results : dict or DataFrame Test results including: - test_method: 'Wilcoxon signed-rank test' - statistic: W-statistic (sum of signed ranks) - pvalue: p-value - stars: Significance stars - significant: Whether null hypothesis is rejected - effect_size: rank-biserial correlation - effect_size_metric: 'rank-biserial correlation' - n_pairs: number of pairs (excluding zeros) - n_zeros: number of zero differences (ties) Notes ----- The Wilcoxon signed-rank test is the non-parametric alternative to the paired t-test. It tests whether the median of differences is zero. **When to use:** - Paired samples (before-after, matched pairs) - Data are not normally distributed - Ordinal data or continuous data with outliers - Robust alternative to paired t-test **Assumptions:** - Paired observations - Differences are symmetric around the median - Ordinal or continuous data **How it works:** 1. Compute differences: d = x - y 2. Remove zero differences 3. Rank absolute differences 4. Sum ranks of positive differences (W+) 5. Sum ranks of negative differences (W-) 6. Test statistic: W = min(W+, W-) **Effect size** (rank-biserial correlation): .. math:: r = \frac{W_+ - W_-}{n(n+1)/2} Ranges from -1 to 1: - r close to 1: x > y (large positive effect) - r close to 0: no difference - r close to -1: x < y (large negative effect) Interpretation: - |r| < 0.1: negligible - |r| < 0.3: small - |r| < 0.5: medium - |r| ≥ 0.5: large References ---------- .. [1] Wilcoxon, F. (1945). "Individual comparisons by ranking methods". Biometrics Bulletin, 1(6), 80-83. .. [2] Kerby, D. S. (2014). "The simple difference formula: An approach to teaching nonparametric correlation". Comprehensive Psychology, 3, 11. Examples -------- >>> before = np.array([10, 12, 15, 18, 20]) >>> after = np.array([12, 14, 17, 20, 22]) >>> result = test_wilcoxon(before, after) >>> result['pvalue'] 0.062... >>> # With visualization >>> result, fig = test_wilcoxon(before, after, plot=True) """ # Resolve column names from DataFrame (seaborn-style data= parameter) if data is not None: from scitex_stats._utils._csv_support import resolve_columns resolved = resolve_columns(data, x=x, y=y) x, y = resolved["x"], resolved["y"] from scitex_stats._utils._formatters import p2stars from scitex_stats._utils._normalizers import force_dataframe # Convert to numpy arrays and remove NaN x = np.asarray(x) y = np.asarray(y) # Check for paired NaN removal valid_mask = ~(np.isnan(x) | np.isnan(y)) x = x[valid_mask] y = y[valid_mask] if len(x) != len(y): raise ValueError(f"Paired samples must have same length: {len(x)} vs {len(y)}") # Compute differences diff = x - y # Count zeros (ties) n_zeros = np.sum(diff == 0) # Perform Wilcoxon signed-rank test w_result = stats.wilcoxon(x, y, alternative=alternative, zero_method="wilcox") w_stat = float(w_result.statistic) pvalue = float(w_result.pvalue) # Compute rank-biserial correlation (effect size) # Remove zeros for ranking diff_nonzero = diff[diff != 0] n_nonzero = len(diff_nonzero) if n_nonzero > 0: # Rank absolute differences abs_diff = np.abs(diff_nonzero) ranks = stats.rankdata(abs_diff) # Sum of ranks for positive and negative differences w_plus = np.sum(ranks[diff_nonzero > 0]) w_minus = np.sum(ranks[diff_nonzero < 0]) # Rank-biserial correlation max_sum = n_nonzero * (n_nonzero + 1) / 2 r = (w_plus - w_minus) / max_sum effect_size = float(r) else: effect_size = 0.0 # Interpret effect size effect_size_abs = abs(effect_size) if effect_size_abs < 0.1: effect_size_interpretation = "negligible" elif effect_size_abs < 0.3: effect_size_interpretation = "small" elif effect_size_abs < 0.5: effect_size_interpretation = "medium" else: effect_size_interpretation = "large" # Create null hypothesis description if alternative == "two-sided": H0 = f"median({var_x} - {var_y}) = 0" elif alternative == "greater": H0 = f"median({var_x} - {var_y}) ≤ 0" else: # less H0 = f"median({var_x} - {var_y}) ≥ 0" # Compile results result = { "test_method": "Wilcoxon signed-rank test", "statistic": w_stat, "stat_symbol": "W", "alternative": alternative, "n_pairs": n_nonzero, "n_zeros": n_zeros, "var_x": var_x, "var_y": var_y, "pvalue": pvalue, "stars": p2stars(pvalue), "alpha": alpha, "significant": pvalue < alpha, "effect_size": effect_size, "effect_size_metric": "rank-biserial correlation", "effect_size_interpretation": effect_size_interpretation, "H0": H0, } # Log results if verbose if verbose: logger.info(f"Wilcoxon: W = {w_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}") logger.info( f"Rank-biserial r = {effect_size:.3f} ({effect_size_interpretation})" ) # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = stx.plt.subplots() _plot_wilcoxon(x, y, var_x, var_y, result, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) return result
def _plot_wilcoxon(x, y, var_x, var_y, result, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) violin_swarm(ax, [x, y], [0, 1], [var_x, var_y]) significance_bracket(ax, 0, 1, result["stars"], [x, y]) stats_text_box( ax, [ fmt_stat("W", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), fmt_stat("r", result["effect_size"]), f"{fmt_sym('n')} = {result['n_pairs']}", ], ) ax.set_title("Wilcoxon Signed-Rank Test") def main(args): # noqa: C901 """Demonstrate Wilcoxon signed-rank test functionality.""" logger.info("Demonstrating Wilcoxon signed-rank test") # Set random seed np.random.seed(42) # Example 1: Normal data (compare with paired t-test) logger.info("\n=== Example 1: Normal data ===") before1 = np.random.normal(10, 2, 30) after1 = before1 + np.random.normal(2, 1, 30) result1 = test_wilcoxon( before1, after1, var_x="Before", var_y="After", verbose=True ) logger.info(f"W = {result1['statistic']:.0f}") logger.info(f"p = {result1['pvalue']:.4f} {result1['stars']}") logger.info( f"Effect size (r) = {result1['effect_size']:.3f} ({result1['effect_size_interpretation']})" ) # Example 2: Skewed data logger.info("\n=== Example 2: Skewed data ===") before2 = np.random.exponential(2, 30) after2 = before2 + np.random.exponential(1, 30) result2 = test_wilcoxon(before2, after2, verbose=True) logger.info(f"W = {result2['statistic']:.0f}") logger.info(f"p = {result2['pvalue']:.4f} {result2['stars']}") logger.info(f"Effect size (r) = {result2['effect_size']:.3f}") # Example 3: Data with outliers logger.info("\n=== Example 3: Data with outliers ===") before3 = np.concatenate([np.random.normal(10, 1, 28), [20, 25]]) # Add outliers after3 = np.concatenate([np.random.normal(12, 1, 28), [22, 27]]) result3 = test_wilcoxon(before3, after3, var_x="Pre", var_y="Post", verbose=True) logger.info(f"W = {result3['statistic']:.0f}") logger.info(f"p = {result3['pvalue']:.4f} {result3['stars']}") logger.info("Wilcoxon is robust to outliers") # Example 4: Small effect logger.info("\n=== Example 4: Small effect (borderline) ===") before4 = np.random.normal(10, 2, 25) after4 = before4 + np.random.normal(0.3, 1, 25) # Small change result4 = test_wilcoxon(before4, after4, verbose=True) logger.info(f"W = {result4['statistic']:.0f}") logger.info(f"p = {result4['pvalue']:.4f} {result4['stars']}") logger.info(f"Effect size (r) = {result4['effect_size']:.3f}") # Example 5: One-sided test logger.info("\n=== Example 5: One-sided test ===") before5 = np.random.normal(10, 2, 30) after5 = before5 + np.random.normal(1.5, 1, 30) result_two = test_wilcoxon(before5, after5, alternative="two-sided", verbose=True) result_less = test_wilcoxon(before5, after5, alternative="less", verbose=True) logger.info(f"Two-sided: p = {result_two['pvalue']:.4f} {result_two['stars']}") logger.info( f"One-sided (less): p = {result_less['pvalue']:.4f} {result_less['stars']}" ) # Example 6: With visualization (demonstrates plt.gcf() and stx.io.save()) logger.info("\n=== Example 6: With visualization ===") before6 = np.random.lognormal(2, 0.5, 40) after6 = before6 * np.random.lognormal(0.15, 0.3, 40) test_wilcoxon( before6, after6, var_x="Baseline", var_y="Treatment", plot=True, verbose=True, ) # Save the figure using plt.gcf() stx.io.save(stx.plt.gcf(), "./wilcoxon_demo.jpg") stx.plt.close() logger.info("Figure saved to wilcoxon_demo.jpg") # Example 7: Compare with paired t-test logger.info("\n=== Example 7: Wilcoxon vs Paired t-test ===") # For normal data normal_before = np.random.normal(10, 2, 50) normal_after = normal_before + np.random.normal(1, 1.5, 50) from ..parametric._test_ttest import test_ttest_rel result_wilcox = test_wilcoxon(normal_before, normal_after, verbose=True) result_ttest = test_ttest_rel(normal_before, normal_after) logger.info( f"Wilcoxon: p = {result_wilcox['pvalue']:.4f} {result_wilcox['stars']}, r = {result_wilcox['effect_size']:.3f}" ) logger.info( f"Paired t-test: p = {result_ttest['pvalue']:.4f} {result_ttest['stars']}, d = {result_ttest['effect_size']:.3f}" ) logger.info("For normal data, both tests should agree") # Example 8: Export results logger.info("\n=== Example 8: Multiple comparisons ===") from scitex_stats._utils._normalizers import combine_results, export_summary results_list = [] for i in range(5): before = np.random.exponential(2, 30) after = before + np.random.exponential(0.5, 30) result = test_wilcoxon(before, after, var_x=f"Pre_{i}", var_y=f"Post_{i}") results_list.append(result) df_all = combine_results(results_list) logger.info(f"\n{df_all[['var_x', 'var_y', 'pvalue', 'stars', 'effect_size']]}") export_summary(df_all, "./wilcoxon_results.csv") logger.info("Results exported") return 0 def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Demonstrate Wilcoxon signed-rank test" ) parser.add_argument("--verbose", action="store_true", help="Enable verbose output") return parser.parse_args() def run_main(): """Initialize SciTeX framework and run main.""" import sys import matplotlib.pyplot as plt args = parse_args() CONFIG, sys.stdout, sys.stderr, plt, _CC, _rng_manager = stx.session.start( sys, plt, args=args, file=__file__, verbose=args.verbose, agg=True, ) exit_status = main(args) stx.session.close( CONFIG, verbose=args.verbose, exit_status=exit_status, ) if __name__ == "__main__": run_main() # EOF