#!/usr/bin/env python3
# Timestamp: "2025-10-01 17:45:00 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_mannwhitneyu.py
r"""
Mann-Whitney U test (Wilcoxon rank-sum test).
Functionalities:
- Perform Mann-Whitney U test (Wilcoxon rank-sum test)
- Non-parametric test for comparing two independent samples
- Compute rank-biserial correlation effect size
- Generate visualizations with rank distributions
- Support flexible output formats (dict or DataFrame)
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: Two independent samples (arrays or Series)
- output: Test results (dict or DataFrame) and optional figure
"""
from __future__ import annotations
import argparse
import os
from typing import Literal, Optional, Union
import matplotlib.axes
import matplotlib.pyplot as _mpl_plt # noqa: E402
import numpy as np
import pandas as pd
from scipy import stats
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
logger = getLogger(__name__)
[docs]
def test_mannwhitneyu( # noqa: C901
x: Union[np.ndarray, pd.Series, str],
y: Union[np.ndarray, pd.Series, str],
var_x: str = "x",
var_y: str = "y",
alternative: Literal["two-sided", "less", "greater"] = "two-sided",
alpha: float = 0.05,
plot: bool = False,
ax: Optional[matplotlib.axes.Axes] = None,
data: Union[pd.DataFrame, str, None] = None,
return_as: Literal["dict", "dataframe"] = "dict",
decimals: int = 3,
verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
r"""
Perform Mann-Whitney U test (Wilcoxon rank-sum test).
Parameters
----------
x, y : arrays or Series
Two independent samples to compare
var_x, var_y : str
Labels for samples
alternative : {'two-sided', 'less', 'greater'}, default 'two-sided'
Alternative hypothesis
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
ax : matplotlib.axes.Axes, optional
Axes object to plot on. If None and plot=True, creates new figure.
If provided, automatically enables plotting.
data : DataFrame, str, or None, optional
DataFrame or CSV path. When provided, string values for x/y
are resolved as column names (seaborn-style).
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
decimals : int, default 3
Number of decimal places for rounding
verbose : bool, default False
Whether to print test results
Returns
-------
results : dict or DataFrame
Test results including:
- test_method: 'Mann-Whitney U test'
- statistic: U-statistic value
- pvalue: p-value
- stars: Significance stars
- significant: Whether null hypothesis is rejected
- effect_size: Rank-biserial correlation
- effect_size_metric: 'rank-biserial correlation'
- effect_size_interpretation: Interpretation
- n_x, n_y: Sample sizes
- var_x, var_y: Variable labels
- H0: Null hypothesis description
Notes
-----
The Mann-Whitney U test (also known as Wilcoxon rank-sum test) is a
non-parametric test for comparing two independent samples.
**Null Hypothesis (H0)**: The two samples come from distributions with
equal medians (more precisely: P(X > Y) = 0.5)
**Test Statistic U**:
.. math::
U = n_1 n_2 + \frac{n_1(n_1+1)}{2} - R_1
Where:
- n_1, n_2: Sample sizes
- R_1: Sum of ranks for sample 1
**Effect Size (Rank-biserial correlation)**:
.. math::
r = 1 - \frac{2U}{n_1 n_2}
Or equivalently:
.. math::
r = \frac{2(\bar{R}_1 - \bar{R}_2)}{n_1 + n_2}
Interpretation:
- |r| < 0.1: negligible
- |r| < 0.3: small
- |r| < 0.5: medium
- |r| ≥ 0.5: large
**Advantages**:
- No normality assumption required
- Robust to outliers
- Works with ordinal data
- More powerful than t-test for non-normal data
**When to use**:
- Comparing two independent groups
- Data violate normality
- Presence of outliers
- Ordinal data (e.g., Likert scales)
- Small sample sizes
**Comparison with other tests**:
- vs t-test: More robust, less powerful when assumptions met
- vs Brunner-Munzel: MWU assumes identical shape, BM does not
- vs KS test: MWU tests location, KS tests entire distribution
**Note on relationship to Brunner-Munzel**:
Mann-Whitney U assumes samples have the same distribution shape
(differing only in location). For more robust analysis without this
assumption, use test_brunner_munzel() instead.
References
----------
.. [1] Mann, H. B., & Whitney, D. R. (1947). "On a test of whether one
of two random variables is stochastically larger than the other".
Annals of Mathematical Statistics, 18(1), 50-60.
.. [2] Kerby, D. S. (2014). "The simple difference formula: An approach
to teaching nonparametric correlation". Comprehensive Psychology, 3, 11.
Examples
--------
>>> # Basic usage
>>> x = np.array([1, 2, 3, 4, 5])
>>> y = np.array([3, 4, 5, 6, 7])
>>> result = test_mannwhitneyu(x, y)
>>> result['rejected']
True
>>> # With auto-created figure
>>> result = test_mannwhitneyu(x, y, plot=True)
>>> # Plot on existing axes
>>> fig, ax = plt.subplots()
>>> result = test_mannwhitneyu(x, y, ax=ax)
>>> # With verbose output
>>> result = test_mannwhitneyu(x, y, verbose=True)
"""
# Resolve column names from DataFrame (seaborn-style data= parameter)
if data is not None:
from scitex_stats._utils._csv_support import resolve_columns
resolved = resolve_columns(data, x=x, y=y)
x, y = resolved["x"], resolved["y"]
from scitex_stats._utils._formatters import p2stars
from scitex_stats._utils._normalizers import convert_results, force_dataframe
# Convert to numpy arrays and remove NaN
x = np.asarray(x)
y = np.asarray(y)
x = x[~np.isnan(x)]
y = y[~np.isnan(y)]
n_x = len(x)
n_y = len(y)
# Perform Mann-Whitney U test
u_result = stats.mannwhitneyu(x, y, alternative=alternative)
u_stat = float(u_result.statistic)
pvalue = float(u_result.pvalue)
# Determine rejection
rejected = pvalue < alpha
# Compute rank-biserial correlation effect size
# Formula: r = 1 - (2U) / (n1 * n2)
r = 1 - (2 * u_stat) / (n_x * n_y)
# Interpret effect size
r_abs = abs(r)
if r_abs < 0.1:
effect_interp = "negligible"
elif r_abs < 0.3:
effect_interp = "small"
elif r_abs < 0.5:
effect_interp = "medium"
else:
effect_interp = "large"
# Compile results
result = {
"test_method": "Mann-Whitney U test",
"statistic": round(u_stat, decimals),
"stat_symbol": "U",
"n_x": n_x,
"n_y": n_y,
"var_x": var_x,
"var_y": var_y,
"pvalue": round(pvalue, decimals),
"stars": p2stars(pvalue),
"alpha": alpha,
"significant": rejected,
"effect_size": round(r, decimals),
"effect_size_metric": "rank-biserial correlation",
"effect_size_interpretation": effect_interp,
"H0": f"Distributions of {var_x} and {var_y} have equal medians",
}
# Add recommendation
if rejected:
result["recommendation"] = (
f"{var_x} and {var_y} have significantly different medians."
)
else:
result["recommendation"] = "No significant difference in medians detected."
# Log results if verbose
if verbose:
logger.info(
f"Mann-Whitney U: U = {u_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
)
logger.info(f"Rank-biserial r = {r:.3f} ({effect_interp})")
# Auto-enable plotting if ax is provided
if ax is not None:
plot = True
# Generate plot if requested
if plot:
if ax is None:
_fig, ax = _mpl_plt.subplots()
_plot_mannwhitneyu(x, y, var_x, var_y, result, ax)
# Convert to requested format
if return_as == "dataframe":
result = force_dataframe(result)
elif return_as not in ["dict", "dataframe"]:
return convert_results(result, return_as=return_as)
return result
def _plot_mannwhitneyu(x, y, var_x, var_y, result, ax):
"""Create violin+swarm visualization on given axes."""
from scitex_stats._plot_helpers import (
significance_bracket,
stats_text_box,
violin_swarm,
)
violin_swarm(ax, [x, y], [0, 1], [var_x, var_y])
significance_bracket(ax, 0, 1, result["stars"], [x, y])
stats_text_box(
ax,
[
fmt_stat("U", result["statistic"]),
fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]),
f"{fmt_sym('n_1')} = {result['n_x']}, {fmt_sym('n_2')} = {result['n_y']}",
],
)
ax.set_title("Mann-Whitney U Test")
def main(args): # noqa: C901
"""Demonstrate Mann-Whitney U test functionality."""
import scitex as stx
logger.info("Demonstrating Mann-Whitney U test")
# Set random seed
np.random.seed(42)
# Example 1: Basic usage
logger.info("\n=== Example 1: Basic usage ===")
x1 = np.random.normal(5, 1, 30)
y1 = np.random.normal(6, 1, 30)
result1 = test_mannwhitneyu(x1, y1, var_x="Group A", var_y="Group B", verbose=True)
# Example 2: Non-normal data
logger.info("\n=== Example 2: Non-normal (skewed) data ===")
x2 = np.random.exponential(2, 40)
y2 = np.random.exponential(3, 40)
result2 = test_mannwhitneyu(
x2, y2, var_x="Exp(λ=0.5)", var_y="Exp(λ=0.33)", verbose=True
)
# Example 3: With outliers
logger.info("\n=== Example 3: Data with outliers ===")
x3 = np.concatenate([np.random.normal(0, 1, 35), [10, 12]])
y3 = np.random.normal(0.5, 1, 40)
result3 = test_mannwhitneyu(
x3, y3, var_x="With Outliers", var_y="Normal", verbose=True
)
logger.info("Mann-Whitney U is robust to outliers")
# Example 4: Ordinal data (Likert scale)
logger.info("\n=== Example 4: Ordinal data (Likert scale) ===")
likert1 = np.random.choice(
[1, 2, 3, 4, 5], size=50, p=[0.05, 0.15, 0.40, 0.30, 0.10]
)
likert2 = np.random.choice(
[1, 2, 3, 4, 5], size=50, p=[0.05, 0.10, 0.25, 0.35, 0.25]
)
result4 = test_mannwhitneyu(
likert1,
likert2,
var_x="Condition A",
var_y="Condition B",
verbose=True,
)
logger.info(f"Medians: {np.median(likert1):.1f} vs {np.median(likert2):.1f}")
# Example 5: One-sided tests
logger.info("\n=== Example 5: One-sided tests ===")
x5 = np.random.normal(5, 1, 40)
y5 = np.random.normal(6, 1, 40)
logger.info("Two-sided:")
test_mannwhitneyu(x5, y5, alternative="two-sided", verbose=True)
logger.info("\nOne-sided (less):")
test_mannwhitneyu(x5, y5, alternative="less", verbose=True)
# Example 6: With visualization
logger.info("\n=== Example 6: Complete analysis with visualization ===")
x6 = np.random.gamma(2, 2, 50)
y6 = np.random.gamma(3, 2, 50)
result6 = test_mannwhitneyu(
x6, y6, var_x="Gamma(k=2)", var_y="Gamma(k=3)", plot=True, verbose=True
)
stx.io.save(_mpl_plt.gcf(), "./mannwhitneyu_example6.jpg")
_mpl_plt.close()
# Example 7: Comparison with t-test
logger.info("\n=== Example 7: Mann-Whitney U vs t-test ===")
from ..parametric._test_ttest import test_ttest_ind
# Normal data - both tests should agree
x_norm = np.random.normal(5, 1, 50)
y_norm = np.random.normal(5.5, 1, 50)
logger.info("Mann-Whitney U:")
test_mannwhitneyu(x_norm, y_norm, verbose=True)
logger.info("\nt-test:")
test_ttest_ind(x_norm, y_norm, verbose=True)
# Non-normal data - MWU more appropriate
x_exp = np.random.exponential(2, 50)
y_exp = np.random.exponential(2.5, 50)
logger.info("\nFor exponential data:")
logger.info("Mann-Whitney U:")
test_mannwhitneyu(x_exp, y_exp, verbose=True)
logger.info("\nt-test:")
test_ttest_ind(x_exp, y_exp, verbose=True)
logger.info("Mann-Whitney U is more reliable for non-normal data")
# Example 8: Comparison with Brunner-Munzel
logger.info("\n=== Example 8: Mann-Whitney U vs Brunner-Munzel ===")
from ._test_brunner_munzel import test_brunner_munzel
# Same shape distributions
x8 = np.random.normal(5, 1, 50)
y8 = np.random.normal(6, 1, 50)
mwu = test_mannwhitneyu(x8, y8)
bm = test_brunner_munzel(x8, y8)
logger.info("Same distribution shape:")
logger.info(
f" Mann-Whitney U: p = {mwu['pvalue']:.4f}, r = {mwu['effect_size']:.3f}"
)
logger.info(
f" Brunner-Munzel: p = {bm['pvalue']:.4f}, P(X>Y) = {bm['effect_size']:.3f}"
)
# Different shapes
x9 = np.random.normal(5, 1, 50)
y9 = np.random.normal(6, 3, 50) # Different variance
mwu2 = test_mannwhitneyu(x9, y9)
bm2 = test_brunner_munzel(x9, y9)
logger.info("\nDifferent distribution shapes:")
logger.info(f" Mann-Whitney U: p = {mwu2['pvalue']:.4f}")
logger.info(f" Brunner-Munzel: p = {bm2['pvalue']:.4f}")
logger.info(" Note: Brunner-Munzel is more appropriate for different shapes")
# Example 9: Export results
logger.info("\n=== Example 9: Export results ===")
from scitex_stats._utils._normalizers import force_dataframe
test_results = [result1, result2, result3, result4, result6]
df = force_dataframe(test_results)
logger.info(f"\nDataFrame shape: {df.shape}")
df.to_excel("./mannwhitneyu_tests.xlsx", index=False)
logger.info("Results exported to Excel")
df.to_csv("./mannwhitneyu_tests.csv", index=False)
logger.info("Results exported to CSV")
return 0
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Demonstrate Mann-Whitney U test")
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
return parser.parse_args()
def run_main():
"""Initialize SciTeX framework and run main."""
import sys
import matplotlib.pyplot as plt
import scitex as stx
args = parse_args()
CONFIG, sys.stdout, sys.stderr, plt, _CC, _rng_manager = stx.session.start(
sys,
plt,
args=args,
file=__file__,
verbose=args.verbose,
agg=True,
)
exit_status = main(args)
stx.session.close(
CONFIG,
verbose=args.verbose,
exit_status=exit_status,
)
if __name__ == "__main__":
run_main()
# EOF