#!/usr/bin/env python3
# Timestamp: "2025-10-01 22:43:58 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_friedman.py
r"""Friedman test for repeated measures (non-parametric).
Functionalities:
- Perform Friedman test for repeated measures (non-parametric)
- Non-parametric alternative to repeated measures ANOVA
- Test differences across 3+ related samples
- Compute Kendall's W (coefficient of concordance)
- Generate rank-based visualizations
Dependencies:
- packages: numpy, pandas, scipy, matplotlib
IO:
- input: Data in wide or long format (subjects × conditions)
- output: Test results (dict or DataFrame) and optional figure
"""
from __future__ import annotations
import os
from typing import List, Literal, Optional, Union
import matplotlib.axes
import numpy as np
import pandas as pd
import scitex as stx
from scipy import stats
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, p2stars
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
logger = getLogger(__name__)
def kendall_w(ranks: np.ndarray) -> float:
"""
Compute Kendall's W (coefficient of concordance).
Parameters
----------
ranks : array, shape (n_subjects, n_conditions)
Rank matrix
Returns
-------
W : float
Kendall's W (0 to 1)
Notes
-----
W = 0: No agreement among subjects
W = 1: Complete agreement among subjects
"""
n, k = ranks.shape
# Sum of ranks for each condition
R = ranks.sum(axis=0)
# Mean of rank sums
R_mean = R.mean()
# Sum of squared deviations
S = np.sum((R - R_mean) ** 2)
# Kendall's W
W = (12 * S) / (n**2 * (k**3 - k))
return float(W)
def interpret_kendall_w(W: float) -> str:
"""Interpret Kendall's W effect size."""
if W < 0.1:
return "negligible agreement"
elif W < 0.3:
return "weak agreement"
elif W < 0.5:
return "moderate agreement"
elif W < 0.7:
return "strong agreement"
else:
return "very strong agreement"
[docs]
def test_friedman( # noqa: C901
data: Union[np.ndarray, pd.DataFrame],
subject_col: Optional[str] = None,
condition_col: Optional[str] = None,
value_col: Optional[str] = None,
condition_names: Optional[List[str]] = None,
alpha: float = 0.05,
plot: bool = False,
ax: Optional[matplotlib.axes.Axes] = None,
return_as: Literal["dict", "dataframe"] = "dict",
decimals: int = 3,
verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
r"""
Perform Friedman test for repeated measures (non-parametric).
Non-parametric alternative to repeated measures ANOVA. Tests whether
distributions differ across 3+ related samples using ranks.
Parameters
----------
data : array or DataFrame
- If array: shape (n_subjects, n_conditions), wide format
- If DataFrame with subject_col/condition_col: long format
- If DataFrame without: wide format (rows=subjects, cols=conditions)
subject_col : str, optional
Column name for subject IDs (long format)
condition_col : str, optional
Column name for conditions (long format)
value_col : str, optional
Column name for values (long format)
condition_names : list of str, optional
Names for conditions (wide format)
alpha : float, default 0.05
Significance level
plot : bool, default False
Whether to generate visualization
ax : matplotlib.axes.Axes, optional
Axes object to plot on. If None and plot=True, creates new figure.
If provided, automatically enables plotting.
return_as : {'dict', 'dataframe'}, default 'dict'
Output format
decimals : int, default 3
Number of decimal places for rounding
verbose : bool, default False
Whether to print test results
Returns
-------
result : dict or DataFrame
Test results including:
- statistic: Chi-square statistic (Friedman's χ²)
- pvalue: p-value
- df: Degrees of freedom (k - 1)
- kendall_w: Kendall's W (coefficient of concordance)
- effect_size: Kendall's W
- effect_size_interpretation: interpretation
- n_subjects: Number of subjects
- n_conditions: Number of conditions
- mean_ranks: Mean rank for each condition
- significant: Whether to reject null hypothesis
Notes
-----
The Friedman test is the non-parametric alternative to repeated measures
ANOVA. It is used when:
- Normality assumption is violated
- Data are ordinal (e.g., Likert scales)
- Sample sizes are small
**Null Hypothesis (H0)**: All conditions have the same distribution
**Alternative Hypothesis (H1)**: At least one condition differs
**Procedure**:
1. Rank observations within each subject (across conditions)
2. Compute sum of ranks for each condition
3. Calculate test statistic based on rank sums
**Test Statistic**:
.. math::
\chi^2_F = \frac{12}{nk(k+1)} \sum_{j=1}^{k} R_j^2 - 3n(k+1)
Where:
- n: Number of subjects
- k: Number of conditions
- R_j: Sum of ranks for condition j
**Effect Size (Kendall's W)**:
.. math::
W = \frac{12 \sum_{j=1}^{k}(R_j - \bar{R})^2}{n^2(k^3 - k)}
Interpretation:
- W < 0.1: negligible agreement
- W < 0.3: weak agreement
- W < 0.5: moderate agreement
- W < 0.7: strong agreement
- W ≥ 0.7: very strong agreement
**Assumptions**:
- Paired/repeated observations (same subjects)
- At least ordinal scale data
- 3+ conditions (for 2 conditions, use Wilcoxon signed-rank test)
**Post-hoc tests**:
If significant:
- Pairwise Wilcoxon signed-rank tests
- Apply corrections: correct_bonferroni(), correct_holm()
**Advantages**:
- No normality assumption
- Robust to outliers
- Works with ordinal data
- No sphericity assumption
**Disadvantages**:
- Less powerful than RM-ANOVA when assumptions are met
- Requires at least ordinal data
- Sensitive to ties
Examples
--------
>>> import numpy as np
>>> from scitex_stats.tests.nonparametric import test_friedman
>>>
>>> # Example: Pain ratings (ordinal) across 4 time points
>>> data = np.array([
... [7, 6, 5, 4], # Subject 1
... [8, 7, 6, 5], # Subject 2
... [6, 5, 4, 3], # Subject 3
... [9, 8, 7, 6], # Subject 4
... ])
>>>
>>> result = test_friedman(
... data,
... condition_names=['Baseline', '1 week', '2 weeks', '3 weeks'],
... plot=True
... )
>>>
>>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}")
>>> print(f"Kendall's W = {result['kendall_w']:.3f}")
References
----------
.. [1] Friedman, M. (1937). "The use of ranks to avoid the assumption of
normality implicit in the analysis of variance". Journal of the
American Statistical Association, 32(200), 675-701.
.. [2] Kendall, M. G., & Babington Smith, B. (1939). "The problem of m
rankings". The Annals of Mathematical Statistics, 10(3), 275-287.
See Also
--------
test_anova_rm : Parametric alternative (repeated measures ANOVA)
test_wilcoxon : For 2 related samples
test_kruskal : For 3+ independent samples
"""
# Convert data to wide format array
if isinstance(data, pd.DataFrame):
if (
subject_col is not None
and condition_col is not None
and value_col is not None
):
# Long format - pivot to wide
data_wide = data.pivot(
index=subject_col, columns=condition_col, values=value_col
)
data_array = data_wide.values
if condition_names is None:
condition_names = list(data_wide.columns)
else:
# Already wide format
data_array = data.values
if condition_names is None:
condition_names = list(data.columns)
else:
data_array = np.asarray(data)
if data_array.ndim != 2:
raise ValueError("Data must be 2D (subjects × conditions)")
n_subjects, n_conditions = data_array.shape
if n_conditions < 3:
raise ValueError(
"Friedman test requires at least 3 conditions. Use test_wilcoxon for 2 conditions."
)
if n_subjects < 2:
raise ValueError("Need at least 2 subjects")
if condition_names is None:
condition_names = [f"Condition {i + 1}" for i in range(n_conditions)]
# Perform Friedman test
statistic, pvalue = stats.friedmanchisquare(*data_array.T)
# Compute ranks for each subject (across conditions)
ranks = np.zeros_like(data_array)
for i in range(n_subjects):
ranks[i, :] = stats.rankdata(data_array[i, :])
# Compute mean ranks for each condition
mean_ranks = ranks.mean(axis=0)
# Compute Kendall's W
W = kendall_w(ranks)
W_interpretation = interpret_kendall_w(W)
# Degrees of freedom
df = n_conditions - 1
# Build result dictionary
result = {
"test": "Friedman test",
"statistic": round(float(statistic), decimals),
"pvalue": round(float(pvalue), decimals + 1),
"df": int(df),
"kendall_w": round(float(W), decimals),
"effect_size": round(float(W), decimals),
"effect_size_metric": "kendall_w",
"effect_size_interpretation": W_interpretation,
"n_subjects": int(n_subjects),
"n_conditions": int(n_conditions),
"condition_names": condition_names,
"mean_ranks": [round(float(r), decimals) for r in mean_ranks],
"alpha": alpha,
"significant": pvalue < alpha,
"stars": p2stars(pvalue),
"H0": "All conditions have the same distribution",
}
# Log results if verbose
if verbose:
logger.info(
f"Friedman: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
)
logger.info(f"Kendall's W = {W:.3f} ({W_interpretation})")
# Auto-enable plotting if ax is provided
if ax is not None:
plot = True
# Generate plot if requested
if plot:
if ax is None:
_fig, ax = stx.plt.subplots()
_plot_friedman(data_array, ranks, result, condition_names, ax)
# Return based on format
if return_as == "dataframe":
result_df = pd.DataFrame([result])
return result_df
else:
return result
def _plot_friedman(data, ranks, result, condition_names, ax):
"""Create violin+swarm visualization on given axes."""
from scitex_stats._plot_helpers import (
significance_bracket,
stats_text_box,
violin_swarm,
)
n_subjects, n_conditions = data.shape
positions = list(range(n_conditions))
data_list = [data[:, i] for i in range(n_conditions)]
violin_swarm(ax, data_list, positions, condition_names)
if result["significant"]:
significance_bracket(ax, 0, n_conditions - 1, result["stars"], data_list)
stats_text_box(
ax,
[
fmt_stat("chi2", result["statistic"]),
fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]),
fmt_stat("W", result["kendall_w"]),
],
)
ax.set_xticklabels(condition_names, rotation=45, ha="right")
ax.set_title("Friedman Test")
ax.grid(True, alpha=0.3, axis="y")
# EOF