Source code for scitex_stats.tests.nonparametric._test_friedman

#!/usr/bin/env python3
# Timestamp: "2025-10-01 22:43:58 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_friedman.py

r"""Friedman test for repeated measures (non-parametric).

Functionalities:
  - Perform Friedman test for repeated measures (non-parametric)
  - Non-parametric alternative to repeated measures ANOVA
  - Test differences across 3+ related samples
  - Compute Kendall's W (coefficient of concordance)
  - Generate rank-based visualizations

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Data in wide or long format (subjects × conditions)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import os
from typing import List, Literal, Optional, Union

import matplotlib.axes
import numpy as np
import pandas as pd
import matplotlib.pyplot as _mpl_plt  # noqa: E402
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, p2stars

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


def kendall_w(ranks: np.ndarray) -> float:
    """
    Compute Kendall's W (coefficient of concordance).

    Parameters
    ----------
    ranks : array, shape (n_subjects, n_conditions)
        Rank matrix

    Returns
    -------
    W : float
        Kendall's W (0 to 1)

    Notes
    -----
    W = 0: No agreement among subjects
    W = 1: Complete agreement among subjects
    """
    n, k = ranks.shape

    # Sum of ranks for each condition
    R = ranks.sum(axis=0)

    # Mean of rank sums
    R_mean = R.mean()

    # Sum of squared deviations
    S = np.sum((R - R_mean) ** 2)

    # Kendall's W
    W = (12 * S) / (n**2 * (k**3 - k))

    return float(W)


def interpret_kendall_w(W: float) -> str:
    """Interpret Kendall's W effect size."""
    if W < 0.1:
        return "negligible agreement"
    elif W < 0.3:
        return "weak agreement"
    elif W < 0.5:
        return "moderate agreement"
    elif W < 0.7:
        return "strong agreement"
    else:
        return "very strong agreement"


[docs] def test_friedman( # noqa: C901 data: Union[np.ndarray, pd.DataFrame], subject_col: Optional[str] = None, condition_col: Optional[str] = None, value_col: Optional[str] = None, condition_names: Optional[List[str]] = None, alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Friedman test for repeated measures (non-parametric). Non-parametric alternative to repeated measures ANOVA. Tests whether distributions differ across 3+ related samples using ranks. Parameters ---------- data : array or DataFrame - If array: shape (n_subjects, n_conditions), wide format - If DataFrame with subject_col/condition_col: long format - If DataFrame without: wide format (rows=subjects, cols=conditions) subject_col : str, optional Column name for subject IDs (long format) condition_col : str, optional Column name for conditions (long format) value_col : str, optional Column name for values (long format) condition_names : list of str, optional Names for conditions (wide format) alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding verbose : bool, default False Whether to print test results Returns ------- result : dict or DataFrame Test results including: - statistic: Chi-square statistic (Friedman's χ²) - pvalue: p-value - df: Degrees of freedom (k - 1) - kendall_w: Kendall's W (coefficient of concordance) - effect_size: Kendall's W - effect_size_interpretation: interpretation - n_subjects: Number of subjects - n_conditions: Number of conditions - mean_ranks: Mean rank for each condition - significant: Whether to reject null hypothesis Notes ----- The Friedman test is the non-parametric alternative to repeated measures ANOVA. It is used when: - Normality assumption is violated - Data are ordinal (e.g., Likert scales) - Sample sizes are small **Null Hypothesis (H0)**: All conditions have the same distribution **Alternative Hypothesis (H1)**: At least one condition differs **Procedure**: 1. Rank observations within each subject (across conditions) 2. Compute sum of ranks for each condition 3. Calculate test statistic based on rank sums **Test Statistic**: .. math:: \chi^2_F = \frac{12}{nk(k+1)} \sum_{j=1}^{k} R_j^2 - 3n(k+1) Where: - n: Number of subjects - k: Number of conditions - R_j: Sum of ranks for condition j **Effect Size (Kendall's W)**: .. math:: W = \frac{12 \sum_{j=1}^{k}(R_j - \bar{R})^2}{n^2(k^3 - k)} Interpretation: - W < 0.1: negligible agreement - W < 0.3: weak agreement - W < 0.5: moderate agreement - W < 0.7: strong agreement - W ≥ 0.7: very strong agreement **Assumptions**: - Paired/repeated observations (same subjects) - At least ordinal scale data - 3+ conditions (for 2 conditions, use Wilcoxon signed-rank test) **Post-hoc tests**: If significant: - Pairwise Wilcoxon signed-rank tests - Apply corrections: correct_bonferroni(), correct_holm() **Advantages**: - No normality assumption - Robust to outliers - Works with ordinal data - No sphericity assumption **Disadvantages**: - Less powerful than RM-ANOVA when assumptions are met - Requires at least ordinal data - Sensitive to ties Examples -------- >>> import numpy as np >>> from scitex_stats.tests.nonparametric import test_friedman >>> >>> # Example: Pain ratings (ordinal) across 4 time points >>> data = np.array([ ... [7, 6, 5, 4], # Subject 1 ... [8, 7, 6, 5], # Subject 2 ... [6, 5, 4, 3], # Subject 3 ... [9, 8, 7, 6], # Subject 4 ... ]) >>> >>> result = test_friedman( ... data, ... condition_names=['Baseline', '1 week', '2 weeks', '3 weeks'], ... plot=True ... ) >>> >>> print(f"χ² = {result['statistic']:.2f}, p = {result['pvalue']:.4f}") >>> print(f"Kendall's W = {result['kendall_w']:.3f}") References ---------- .. [1] Friedman, M. (1937). "The use of ranks to avoid the assumption of normality implicit in the analysis of variance". Journal of the American Statistical Association, 32(200), 675-701. .. [2] Kendall, M. G., & Babington Smith, B. (1939). "The problem of m rankings". The Annals of Mathematical Statistics, 10(3), 275-287. See Also -------- test_anova_rm : Parametric alternative (repeated measures ANOVA) test_wilcoxon : For 2 related samples test_kruskal : For 3+ independent samples """ # Convert data to wide format array if isinstance(data, pd.DataFrame): if ( subject_col is not None and condition_col is not None and value_col is not None ): # Long format - pivot to wide data_wide = data.pivot( index=subject_col, columns=condition_col, values=value_col ) data_array = data_wide.values if condition_names is None: condition_names = list(data_wide.columns) else: # Already wide format data_array = data.values if condition_names is None: condition_names = list(data.columns) else: data_array = np.asarray(data) if data_array.ndim != 2: raise ValueError("Data must be 2D (subjects × conditions)") n_subjects, n_conditions = data_array.shape if n_conditions < 3: raise ValueError( "Friedman test requires at least 3 conditions. Use test_wilcoxon for 2 conditions." ) if n_subjects < 2: raise ValueError("Need at least 2 subjects") if condition_names is None: condition_names = [f"Condition {i + 1}" for i in range(n_conditions)] # Perform Friedman test statistic, pvalue = stats.friedmanchisquare(*data_array.T) # Compute ranks for each subject (across conditions) ranks = np.zeros_like(data_array) for i in range(n_subjects): ranks[i, :] = stats.rankdata(data_array[i, :]) # Compute mean ranks for each condition mean_ranks = ranks.mean(axis=0) # Compute Kendall's W W = kendall_w(ranks) W_interpretation = interpret_kendall_w(W) # Degrees of freedom df = n_conditions - 1 # Build result dictionary result = { "test": "Friedman test", "statistic": round(float(statistic), decimals), "pvalue": round(float(pvalue), decimals + 1), "df": int(df), "kendall_w": round(float(W), decimals), "effect_size": round(float(W), decimals), "effect_size_metric": "kendall_w", "effect_size_interpretation": W_interpretation, "n_subjects": int(n_subjects), "n_conditions": int(n_conditions), "condition_names": condition_names, "mean_ranks": [round(float(r), decimals) for r in mean_ranks], "alpha": alpha, "significant": pvalue < alpha, "stars": p2stars(pvalue), "H0": "All conditions have the same distribution", } # Log results if verbose if verbose: logger.info( f"Friedman: χ² = {statistic:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}" ) logger.info(f"Kendall's W = {W:.3f} ({W_interpretation})") # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = _mpl_plt.subplots() _plot_friedman(data_array, ranks, result, condition_names, ax) # Return based on format if return_as == "dataframe": result_df = pd.DataFrame([result]) return result_df else: return result
def _plot_friedman(data, ranks, result, condition_names, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) n_subjects, n_conditions = data.shape positions = list(range(n_conditions)) data_list = [data[:, i] for i in range(n_conditions)] violin_swarm(ax, data_list, positions, condition_names) if result["significant"]: significance_bracket(ax, 0, n_conditions - 1, result["stars"], data_list) stats_text_box( ax, [ fmt_stat("chi2", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), fmt_stat("W", result["kendall_w"]), ], ) ax.set_xticklabels(condition_names, rotation=45, ha="right") ax.set_title("Friedman Test") ax.grid(True, alpha=0.3, axis="y") # EOF