#!/usr/bin/env python3
# Timestamp: "2025-10-01 22:43:58 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_friedman.py
r"""Friedman test for repeated measures (non-parametric).
Functionalities:
- Perform Friedman test for repeated measures (non-parametric)
- Non-parametric alternative to repeated measures ANOVA
- Test differences across 3+ related samples
- Compute Kendall's W (coefficient of concordance)
- Generate rank-based visualizations
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: Data in wide or long format (subjects × conditions)
- output: Test results (dict or DataFrame) and optional figure
"""
from __future__ import annotations
import os
from typing import List, Literal, Optional, Union
import matplotlib.axes
import numpy as np
import pandas as pd
import matplotlib.pyplot as _mpl_plt # noqa: E402
from scipy import stats
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, p2stars
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
logger = getLogger(__name__)
def kendall_w(ranks: np.ndarray) -> float:
"""
Compute Kendall's W (coefficient of concordance).
Parameters
----------
ranks : array, shape (n_subjects, n_conditions)
Rank matrix
Returns
-------
W : float
Kendall's W (0 to 1)
Notes
-----
W = 0: No agreement among subjects
W = 1: Complete agreement among subjects
"""
n, k = ranks.shape
# Sum of ranks for each condition
R = ranks.sum(axis=0)
# Mean of rank sums
R_mean = R.mean()
# Sum of squared deviations
S = np.sum((R - R_mean) ** 2)
# Kendall's W
W = (12 * S) / (n**2 * (k**3 - k))
return float(W)
def interpret_kendall_w(W: float) -> str:
"""Interpret Kendall's W effect size."""
if W < 0.1:
return "negligible agreement"
elif W < 0.3:
return "weak agreement"
elif W < 0.5:
return "moderate agreement"
elif W < 0.7:
return "strong agreement"
else:
return "very strong agreement"
[docs]
def test_friedman( # noqa: C901
data: Union[np.ndarray, pd.DataFrame],
subject_col: Optional[str] = None,
condition_col: Optional[str] = None,
value_col: Optional[str] = None,
condition_names: Optional[List[str]] = None,
alpha: float = 0.05,
plot: bool = False,
ax: Optional[matplotlib.axes.Axes] = None,
return_as: Literal["dict", "dataframe"] = "dict",
decimals: int = 3,
verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
r"""
Perform Friedman test for repeated measures (non-parametric).
Non-parametric alternative to repeated measures ANOVA. Tests whether
distributions differ across 3+ related samples using ranks.
Parameters
----------
data : array or DataFrame
- If array: shape (n_subjects, n_conditions), wide format
- If DataFrame with subject_col/condition_col: long format
- If DataFrame without: wide format (rows=subjects, cols=conditions)
subject_col : str, optional
Column name for subject IDs (long format)
condition_col : str, optional
Column name for conditions (long format)
value_col : str, optional
Column name for values (long format)
condition_names : list of str, optional
Names for conditions (wide format)
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
ax : matplotlib.axes.Axes, optional
Axes object to plot on. If None and plot=True, creates new figure.
If provided, automatically enables plotting.
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
decimals : int, default 3
Number of decimal places for rounding
verbose : bool, default False
Whether to print test results
Returns
-------
result : dict or DataFrame
Test results including:
- statistic: Chi-square statistic (Friedman's χ²)
- pvalue: p-value
- df: Degrees of freedom (k - 1)
- kendall_w: Kendall's W (coefficient of concordance)
- effect_size: Kendall's W
- effect_size_interpretation: interpretation
- n_subjects: Number of subjects
- n_conditions: Number of conditions
- mean_ranks: Mean rank for each condition
- significant: Whether to reject null hypothesis
Notes
-----
The Friedman test is the non-parametric alternative to repeated measures
ANOVA. It is used when:
- Normality assumption is violated
- Data are ordinal (e.g., Likert scales)
- Sample sizes are small
**Null Hypothesis (H0)**: All conditions have the same distribution
**Alternative Hypothesis (H1)**: At least one condition differs
**Procedure**:
1. Rank observations within each subject (across conditions)
2. Compute sum of ranks for each condition
3. Calculate test statistic based on rank sums
**Test Statistic**:
.. math::
\chi^2_F = \frac{12}{nk(k+1)} \sum_{j=1}^{k} R_j^2 - 3n(k+1)
Where:
- n: Number of subjects
- k: Number of conditions
- R_j: Sum of ranks for condition j
**Effect Size (Kendall's W)**:
.. math::
W = \frac{12 \sum_{j=1}^{k}(R_j - \bar{R})^2}{n^2(k^3 - k)}
Interpretation:
- W < 0.1: negligible agreement
- W < 0.3: weak agreement
- W < 0.5: moderate agreement
- W < 0.7: strong agreement
- W ≥ 0.7: very strong agreement
**Assumptions**:
- Paired/repeated observations (same subjects)
- At least ordinal scale data
- 3+ conditions (for 2 conditions, use Wilcoxon signed-rank test)
**Post-hoc tests**:
If significant:
- Pairwise Wilcoxon signed-rank tests
- Apply corrections: correct_bonferroni(), correct_holm()
**Advantages**:
- No normality assumption
- Robust to outliers
- Works with ordinal data
- No sphericity assumption
**Disadvantages**:
- Less powerful than RM-ANOVA when assumptions are met
- Requires at least ordinal data
- Sensitive to ties
Examples
--------
>>> import numpy as np
>>> from scitex_stats.tests.nonparametric import test_friedman
>>>
>>> # Example: Pain ratings (ordinal) across 4 time points
>>> data = np.array([
... [7, 6, 5, 4], # Subject 1
... [8, 7, 6, 5], # Subject 2
... [6, 5, 4, 3], # Subject 3
... [9, 8, 7, 6], # Subject 4
... ])
>>>
>>> result = test_friedman(
... data,
... condition_names=['Baseline', '1 week', '2 weeks', '3 weeks'],
... plot=True
... )
>>>
>>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
>>> print(f"Kendall's W = {result['kendall_w']:.3f}")
References
----------
.. [1] Friedman, M. (1937). "The use of ranks to avoid the assumption of
normality implicit in the analysis of variance". Journal of the
American Statistical Association, 32(200), 675-701.
.. [2] Kendall, M. G., & Babington Smith, B. (1939). "The problem of m
rankings". The Annals of Mathematical Statistics, 10(3), 275-287.
See Also
--------
test_anova_rm : Parametric alternative (repeated measures ANOVA)
test_wilcoxon : For 2 related samples
test_kruskal : For 3+ independent samples
"""
# Convert data to wide format array
if isinstance(data, pd.DataFrame):
if (
subject_col is not None
and condition_col is not None
and value_col is not None
):
# Long format - pivot to wide
data_wide = data.pivot(
index=subject_col, columns=condition_col, values=value_col
)
data_array = data_wide.values
if condition_names is None:
condition_names = list(data_wide.columns)
else:
# Already wide format
data_array = data.values
if condition_names is None:
condition_names = list(data.columns)
else:
data_array = np.asarray(data)
if data_array.ndim != 2:
raise ValueError("Data must be 2D (subjects × conditions)")
n_subjects, n_conditions = data_array.shape
if n_conditions < 3:
raise ValueError(
"Friedman test requires at least 3 conditions. Use test_wilcoxon for 2 conditions."
)
if n_subjects < 2:
raise ValueError("Need at least 2 subjects")
if condition_names is None:
condition_names = [f"Condition {i + 1}" for i in range(n_conditions)]
# Perform Friedman test
statistic, pvalue = stats.friedmanchisquare(*data_array.T)
# Compute ranks for each subject (across conditions)
ranks = np.zeros_like(data_array)
for i in range(n_subjects):
ranks[i, :] = stats.rankdata(data_array[i, :])
# Compute mean ranks for each condition
mean_ranks = ranks.mean(axis=0)
# Compute Kendall's W
W = kendall_w(ranks)
W_interpretation = interpret_kendall_w(W)
# Degrees of freedom
df = n_conditions - 1
# Build result dictionary
result = {
"test": "Friedman test",
"statistic": round(float(statistic), decimals),
"pvalue": round(float(pvalue), decimals + 1),
"df": int(df),
"kendall_w": round(float(W), decimals),
"effect_size": round(float(W), decimals),
"effect_size_metric": "kendall_w",
"effect_size_interpretation": W_interpretation,
"n_subjects": int(n_subjects),
"n_conditions": int(n_conditions),
"condition_names": condition_names,
"mean_ranks": [round(float(r), decimals) for r in mean_ranks],
"alpha": alpha,
"significant": pvalue < alpha,
"stars": p2stars(pvalue),
"H0": "All conditions have the same distribution",
}
# Log results if verbose
if verbose:
logger.info(
f"Friedman: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
)
logger.info(f"Kendall's W = {W:.3f} ({W_interpretation})")
# Auto-enable plotting if ax is provided
if ax is not None:
plot = True
# Generate plot if requested
if plot:
if ax is None:
_fig, ax = _mpl_plt.subplots()
_plot_friedman(data_array, ranks, result, condition_names, ax)
# Return based on format
if return_as == "dataframe":
result_df = pd.DataFrame([result])
return result_df
else:
return result
def _plot_friedman(data, ranks, result, condition_names, ax):
"""Create violin+swarm visualization on given axes."""
from scitex_stats._plot_helpers import (
significance_bracket,
stats_text_box,
violin_swarm,
)
n_subjects, n_conditions = data.shape
positions = list(range(n_conditions))
data_list = [data[:, i] for i in range(n_conditions)]
violin_swarm(ax, data_list, positions, condition_names)
if result["significant"]:
significance_bracket(ax, 0, n_conditions - 1, result["stars"], data_list)
stats_text_box(
ax,
[
fmt_stat("chi2", result["statistic"]),
fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]),
fmt_stat("W", result["kendall_w"]),
],
)
ax.set_xticklabels(condition_names, rotation=45, ha="right")
ax.set_title("Friedman Test")
ax.grid(True, alpha=0.3, axis="y")
# EOF