Source code for scitex_stats.tests.nonparametric._test_mannwhitneyu

#!/usr/bin/env python3
# Timestamp: "2025-10-01 17:45:00 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_mannwhitneyu.py

r"""
Mann-Whitney U test (Wilcoxon rank-sum test).

Functionalities:
  - Perform Mann-Whitney U test (Wilcoxon rank-sum test)
  - Non-parametric test for comparing two independent samples
  - Compute rank-biserial correlation effect size
  - Generate visualizations with rank distributions
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Two independent samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import Literal, Optional, Union

import matplotlib.axes
import numpy as np
import pandas as pd
from scipy import stats

import scitex as stx
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


[docs] def test_mannwhitneyu( # noqa: C901 x: Union[np.ndarray, pd.Series, str], y: Union[np.ndarray, pd.Series, str], var_x: str = "x", var_y: str = "y", alternative: Literal["two-sided", "less", "greater"] = "two-sided", alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, data: Union[pd.DataFrame, str, None] = None, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Mann-Whitney U test (Wilcoxon rank-sum test). Parameters ---------- x, y : arrays or Series Two independent samples to compare var_x, var_y : str Labels for samples alternative : {'two-sided', 'less', 'greater'}, default 'two-sided' Alternative hypothesis alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. data : DataFrame, str, or None, optional DataFrame or CSV path. When provided, string values for x/y are resolved as column names (seaborn-style). return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding verbose : bool, default False Whether to print test results Returns ------- results : dict or DataFrame Test results including: - test_method: 'Mann-Whitney U test' - statistic: U-statistic value - pvalue: p-value - stars: Significance stars - significant: Whether null hypothesis is rejected - effect_size: Rank-biserial correlation - effect_size_metric: 'rank-biserial correlation' - effect_size_interpretation: Interpretation - n_x, n_y: Sample sizes - var_x, var_y: Variable labels - H0: Null hypothesis description Notes ----- The Mann-Whitney U test (also known as Wilcoxon rank-sum test) is a non-parametric test for comparing two independent samples. **Null Hypothesis (H0)**: The two samples come from distributions with equal medians (more precisely: P(X > Y) = 0.5) **Test Statistic U**: .. math:: U = n_1 n_2 + \frac{n_1(n_1+1)}{2} - R_1 Where: - n_1, n_2: Sample sizes - R_1: Sum of ranks for sample 1 **Effect Size (Rank-biserial correlation)**: .. math:: r = 1 - \frac{2U}{n_1 n_2} Or equivalently: .. math:: r = \frac{2(\bar{R}_1 - \bar{R}_2)}{n_1 + n_2} Interpretation: - |r| < 0.1: negligible - |r| < 0.3: small - |r| < 0.5: medium - |r| ≥ 0.5: large **Advantages**: - No normality assumption required - Robust to outliers - Works with ordinal data - More powerful than t-test for non-normal data **When to use**: - Comparing two independent groups - Data violate normality - Presence of outliers - Ordinal data (e.g., Likert scales) - Small sample sizes **Comparison with other tests**: - vs t-test: More robust, less powerful when assumptions met - vs Brunner-Munzel: MWU assumes identical shape, BM does not - vs KS test: MWU tests location, KS tests entire distribution **Note on relationship to Brunner-Munzel**: Mann-Whitney U assumes samples have the same distribution shape (differing only in location). For more robust analysis without this assumption, use test_brunner_munzel() instead. References ---------- .. [1] Mann, H. B., & Whitney, D. R. (1947). "On a test of whether one of two random variables is stochastically larger than the other". Annals of Mathematical Statistics, 18(1), 50-60. .. [2] Kerby, D. S. (2014). "The simple difference formula: An approach to teaching nonparametric correlation". Comprehensive Psychology, 3, 11. Examples -------- >>> # Basic usage >>> x = np.array([1, 2, 3, 4, 5]) >>> y = np.array([3, 4, 5, 6, 7]) >>> result = test_mannwhitneyu(x, y) >>> result['rejected'] True >>> # With auto-created figure >>> result = test_mannwhitneyu(x, y, plot=True) >>> # Plot on existing axes >>> fig, ax = plt.subplots() >>> result = test_mannwhitneyu(x, y, ax=ax) >>> # With verbose output >>> result = test_mannwhitneyu(x, y, verbose=True) """ # Resolve column names from DataFrame (seaborn-style data= parameter) if data is not None: from scitex_stats._utils._csv_support import resolve_columns resolved = resolve_columns(data, x=x, y=y) x, y = resolved["x"], resolved["y"] from scitex_stats._utils._formatters import p2stars from scitex_stats._utils._normalizers import convert_results, force_dataframe # Convert to numpy arrays and remove NaN x = np.asarray(x) y = np.asarray(y) x = x[~np.isnan(x)] y = y[~np.isnan(y)] n_x = len(x) n_y = len(y) # Perform Mann-Whitney U test u_result = stats.mannwhitneyu(x, y, alternative=alternative) u_stat = float(u_result.statistic) pvalue = float(u_result.pvalue) # Determine rejection rejected = pvalue < alpha # Compute rank-biserial correlation effect size # Formula: r = 1 - (2U) / (n1 * n2) r = 1 - (2 * u_stat) / (n_x * n_y) # Interpret effect size r_abs = abs(r) if r_abs < 0.1: effect_interp = "negligible" elif r_abs < 0.3: effect_interp = "small" elif r_abs < 0.5: effect_interp = "medium" else: effect_interp = "large" # Compile results result = { "test_method": "Mann-Whitney U test", "statistic": round(u_stat, decimals), "stat_symbol": "U", "n_x": n_x, "n_y": n_y, "var_x": var_x, "var_y": var_y, "pvalue": round(pvalue, decimals), "stars": p2stars(pvalue), "alpha": alpha, "significant": rejected, "effect_size": round(r, decimals), "effect_size_metric": "rank-biserial correlation", "effect_size_interpretation": effect_interp, "H0": f"Distributions of {var_x} and {var_y} have equal medians", } # Add recommendation if rejected: result["recommendation"] = ( f"{var_x} and {var_y} have significantly different medians." ) else: result["recommendation"] = "No significant difference in medians detected." # Log results if verbose if verbose: logger.info( f"Mann-Whitney U: U = {u_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}" ) logger.info(f"Rank-biserial r = {r:.3f} ({effect_interp})") # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = stx.plt.subplots() _plot_mannwhitneyu(x, y, var_x, var_y, result, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) elif return_as not in ["dict", "dataframe"]: return convert_results(result, return_as=return_as) return result
def _plot_mannwhitneyu(x, y, var_x, var_y, result, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) violin_swarm(ax, [x, y], [0, 1], [var_x, var_y]) significance_bracket(ax, 0, 1, result["stars"], [x, y]) stats_text_box( ax, [ fmt_stat("U", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), f"{fmt_sym('n_1')} = {result['n_x']}, {fmt_sym('n_2')} = {result['n_y']}", ], ) ax.set_title("Mann-Whitney U Test") def main(args): # noqa: C901 """Demonstrate Mann-Whitney U test functionality.""" logger.info("Demonstrating Mann-Whitney U test") # Set random seed np.random.seed(42) # Example 1: Basic usage logger.info("\n=== Example 1: Basic usage ===") x1 = np.random.normal(5, 1, 30) y1 = np.random.normal(6, 1, 30) result1 = test_mannwhitneyu(x1, y1, var_x="Group A", var_y="Group B", verbose=True) # Example 2: Non-normal data logger.info("\n=== Example 2: Non-normal (skewed) data ===") x2 = np.random.exponential(2, 40) y2 = np.random.exponential(3, 40) result2 = test_mannwhitneyu( x2, y2, var_x="Exp(λ=0.5)", var_y="Exp(λ=0.33)", verbose=True ) # Example 3: With outliers logger.info("\n=== Example 3: Data with outliers ===") x3 = np.concatenate([np.random.normal(0, 1, 35), [10, 12]]) y3 = np.random.normal(0.5, 1, 40) result3 = test_mannwhitneyu( x3, y3, var_x="With Outliers", var_y="Normal", verbose=True ) logger.info("Mann-Whitney U is robust to outliers") # Example 4: Ordinal data (Likert scale) logger.info("\n=== Example 4: Ordinal data (Likert scale) ===") likert1 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.15, 0.40, 0.30, 0.10] ) likert2 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.10, 0.25, 0.35, 0.25] ) result4 = test_mannwhitneyu( likert1, likert2, var_x="Condition A", var_y="Condition B", verbose=True, ) logger.info(f"Medians: {np.median(likert1):.1f} vs {np.median(likert2):.1f}") # Example 5: One-sided tests logger.info("\n=== Example 5: One-sided tests ===") x5 = np.random.normal(5, 1, 40) y5 = np.random.normal(6, 1, 40) logger.info("Two-sided:") test_mannwhitneyu(x5, y5, alternative="two-sided", verbose=True) logger.info("\nOne-sided (less):") test_mannwhitneyu(x5, y5, alternative="less", verbose=True) # Example 6: With visualization logger.info("\n=== Example 6: Complete analysis with visualization ===") x6 = np.random.gamma(2, 2, 50) y6 = np.random.gamma(3, 2, 50) result6 = test_mannwhitneyu( x6, y6, var_x="Gamma(k=2)", var_y="Gamma(k=3)", plot=True, verbose=True ) stx.io.save(stx.plt.gcf(), "./mannwhitneyu_example6.jpg") stx.plt.close() # Example 7: Comparison with t-test logger.info("\n=== Example 7: Mann-Whitney U vs t-test ===") from ..parametric._test_ttest import test_ttest_ind # Normal data - both tests should agree x_norm = np.random.normal(5, 1, 50) y_norm = np.random.normal(5.5, 1, 50) logger.info("Mann-Whitney U:") test_mannwhitneyu(x_norm, y_norm, verbose=True) logger.info("\nt-test:") test_ttest_ind(x_norm, y_norm, verbose=True) # Non-normal data - MWU more appropriate x_exp = np.random.exponential(2, 50) y_exp = np.random.exponential(2.5, 50) logger.info("\nFor exponential data:") logger.info("Mann-Whitney U:") test_mannwhitneyu(x_exp, y_exp, verbose=True) logger.info("\nt-test:") test_ttest_ind(x_exp, y_exp, verbose=True) logger.info("Mann-Whitney U is more reliable for non-normal data") # Example 8: Comparison with Brunner-Munzel logger.info("\n=== Example 8: Mann-Whitney U vs Brunner-Munzel ===") from ._test_brunner_munzel import test_brunner_munzel # Same shape distributions x8 = np.random.normal(5, 1, 50) y8 = np.random.normal(6, 1, 50) mwu = test_mannwhitneyu(x8, y8) bm = test_brunner_munzel(x8, y8) logger.info("Same distribution shape:") logger.info( f" Mann-Whitney U: p = {mwu['pvalue']:.4f}, r = {mwu['effect_size']:.3f}" ) logger.info( f" Brunner-Munzel: p = {bm['pvalue']:.4f}, P(X>Y) = {bm['effect_size']:.3f}" ) # Different shapes x9 = np.random.normal(5, 1, 50) y9 = np.random.normal(6, 3, 50) # Different variance mwu2 = test_mannwhitneyu(x9, y9) bm2 = test_brunner_munzel(x9, y9) logger.info("\nDifferent distribution shapes:") logger.info(f" Mann-Whitney U: p = {mwu2['pvalue']:.4f}") logger.info(f" Brunner-Munzel: p = {bm2['pvalue']:.4f}") logger.info(" Note: Brunner-Munzel is more appropriate for different shapes") # Example 9: Export results logger.info("\n=== Example 9: Export results ===") from scitex_stats._utils._normalizers import convert_results, force_dataframe test_results = [result1, result2, result3, result4, result6] df = force_dataframe(test_results) logger.info(f"\nDataFrame shape: {df.shape}") convert_results(test_results, return_as="excel", path="./mannwhitneyu_tests.xlsx") # type: ignore[arg-type] logger.info("Results exported to Excel") convert_results(test_results, return_as="csv", path="./mannwhitneyu_tests.csv") # type: ignore[arg-type] logger.info("Results exported to CSV") return 0 def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser(description="Demonstrate Mann-Whitney U test") parser.add_argument("--verbose", action="store_true", help="Enable verbose output") return parser.parse_args() def run_main(): """Initialize SciTeX framework and run main.""" import sys import matplotlib.pyplot as plt args = parse_args() CONFIG, sys.stdout, sys.stderr, plt, _CC, _rng_manager = stx.session.start( sys, plt, args=args, file=__file__, verbose=args.verbose, agg=True, ) exit_status = main(args) stx.session.close( CONFIG, verbose=args.verbose, exit_status=exit_status, ) if __name__ == "__main__": run_main() # EOF