Source code for scitex_stats.tests.nonparametric._test_kruskal

#!/usr/bin/env python3
# Timestamp: "2025-10-01 18:14:34 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_kruskal.py

r"""
Kruskal-Wallis H test for independent samples.

Functionalities:
  - Perform Kruskal-Wallis H test for independent samples
  - Non-parametric alternative to one-way ANOVA
  - Compute epsilon-squared effect size
  - Generate box plots with significance annotations
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Multiple independent samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import List, Literal, Optional, Union

import matplotlib
import matplotlib.axes
import matplotlib.pyplot as _mpl_plt  # noqa: E402
import numpy as np
import pandas as pd
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


[docs] def test_kruskal( # noqa: C901 groups: Optional[List[Union[np.ndarray, pd.Series]]] = None, var_names: Optional[List[str]] = None, alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, data: Union[pd.DataFrame, str, None] = None, value_col: Optional[str] = None, group_col: Optional[str] = None, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Kruskal-Wallis H test for independent samples. Parameters ---------- groups : list of arrays List of sample arrays for each group (minimum 2 groups) var_names : list of str, optional Names for each group. If None, uses 'Group 1', 'Group 2', etc. alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate box plots ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. data : DataFrame, str, or None, optional DataFrame or CSV path. When provided with value_col and group_col, groups are extracted automatically (seaborn-style). value_col : str, optional Column containing measurement values (used with data=). group_col : str, optional Column containing group labels (used with data=). return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding verbose : bool, default False Whether to print test results Returns ------- results : dict or DataFrame Test results including: - test_method: 'Kruskal-Wallis H test' - statistic: H-statistic value - pvalue: p-value - stars: Significance stars - significant: Whether null hypothesis is rejected - effect_size: Epsilon-squared (ε²) - effect_size_metric: 'epsilon-squared' - effect_size_interpretation: Interpretation of epsilon-squared - n_groups: Number of groups - n_samples: Sample sizes for each group - var_names: Group labels - H0: Null hypothesis description Notes ----- The Kruskal-Wallis H test is a non-parametric alternative to one-way ANOVA. It tests whether samples originate from the same distribution by comparing the ranks of observations across groups. **Null Hypothesis (H0)**: All groups have the same population median (more precisely: all groups have identical distribution functions) **Assumptions**: - Independent observations within and between groups - Ordinal or continuous data - Similar distribution shapes across groups (for median interpretation) **Advantages over ANOVA**: - No normality assumption required - Robust to outliers - Works with ordinal data - More powerful than ANOVA for heavy-tailed distributions **When to use**: - Comparing 3+ independent groups - Data violate normality (use test_shapiro to check) - Presence of outliers - Ordinal data (e.g., Likert scales) **Test Statistic H**: .. math:: H = \frac{12}{N(N+1)} \sum_{i=1}^{k} \frac{R_i^2}{n_i} - 3(N+1) Where: - k: Number of groups - N: Total sample size - R_i: Sum of ranks for group i - n_i: Sample size of group i **Effect Size (Epsilon-squared)**: .. math:: \epsilon^2 = \frac{H - k + 1}{N - k} Interpretation (similar to eta-squared): - ε² < 0.01: negligible - ε² < 0.06: small - ε² < 0.14: medium - ε² ≥ 0.14: large **Post-hoc tests**: If significant, use pairwise comparisons with correction: - test_brunner_munzel() for all pairs - correct_bonferroni() or correct_fdr() for multiple comparisons **Tied ranks**: Handled automatically by scipy.stats.kruskal() References ---------- .. [1] Kruskal, W. H., & Wallis, W. A. (1952). "Use of ranks in one-criterion variance analysis". Journal of the American Statistical Association, 47(260), 583-621. .. [2] Hecke, T. V. (2012). "Power study of ANOVA versus Kruskal-Wallis test". Journal of Statistics and Management Systems, 15(2-3), 241-247. .. [3] Tomczak, M., & Tomczak, E. (2014). "The need to report effect size estimates revisited". Trends in Sport Sciences, 21(1), 19-25. Examples -------- >>> # Three groups with different medians >>> group1 = np.array([1, 2, 3, 4, 5]) >>> group2 = np.array([3, 4, 5, 6, 7]) >>> group3 = np.array([5, 6, 7, 8, 9]) >>> result = test_kruskal([group1, group2, group3]) >>> result['rejected'] True >>> # With custom names and plot >>> result, fig = test_kruskal( ... [group1, group2, group3], ... var_names=['Control', 'Treatment 1', 'Treatment 2'], ... plot=True ... ) >>> # Export results >>> from scitex_stats.utils._normalizers import convert_results >>> convert_results(result, return_as='excel', path='kruskal_results.xlsx') """ from scitex_stats._utils._effect_size import ( epsilon_squared, interpret_epsilon_squared, ) from scitex_stats._utils._formatters import p2stars from scitex_stats._utils._normalizers import convert_results, force_dataframe # Resolve groups from DataFrame (seaborn-style data= parameter) if data is not None and value_col is not None and group_col is not None: from scitex_stats._utils._csv_support import resolve_groups groups, group_names = resolve_groups(data, value_col, group_col) if var_names is None: var_names = group_names # Validate input if len(groups) < 2: raise ValueError("Kruskal-Wallis test requires at least 2 groups") # Convert to numpy arrays and remove NaN groups = [np.asarray(g) for g in groups] groups = [g[~np.isnan(g)] for g in groups] # Check if all groups have at least 5 observations (recommended) for i, g in enumerate(groups): if len(g) < 5: logger.warning( f"Group {i + 1} has only {len(g)} observations. " "Kruskal-Wallis test is most reliable with n ≥ 5 per group." ) # Generate default names if not provided if var_names is None: var_names = [f"Group {i + 1}" for i in range(len(groups))] if len(var_names) != len(groups): raise ValueError("Number of var_names must match number of groups") # Get sample sizes n_samples = [len(g) for g in groups] n_groups = len(groups) # Perform Kruskal-Wallis test h_result = stats.kruskal(*groups) h_stat = float(h_result.statistic) pvalue = float(h_result.pvalue) # Determine rejection rejected = pvalue < alpha # Compute effect size (epsilon-squared) effect_size = epsilon_squared(groups) effect_size_interp = interpret_epsilon_squared(effect_size) # Compile results result = { "test_method": "Kruskal-Wallis H test", "statistic": round(h_stat, decimals), "stat_symbol": "H", "n_groups": n_groups, "n_samples": n_samples, "var_names": var_names, "pvalue": round(pvalue, decimals), "stars": p2stars(pvalue), "alpha": alpha, "significant": rejected, "effect_size": round(effect_size, decimals), "effect_size_metric": "epsilon-squared", "effect_size_interpretation": effect_size_interp, "H0": "All groups have the same population median", } # Add post-hoc recommendation if significant if rejected: result["recommendation"] = ( "Significant difference detected. Perform post-hoc pairwise comparisons " "with test_brunner_munzel() and apply correction (correct_bonferroni or correct_fdr)." ) else: result["recommendation"] = "No significant difference between groups." # Log results if verbose if verbose: logger.info( f"Kruskal-Wallis: H = {h_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}" ) logger.info(f"Epsilon-squared = {effect_size:.3f} ({effect_size_interp})") # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = _mpl_plt.subplots() _plot_kruskal(groups, var_names, result, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) elif return_as not in ["dict", "dataframe"]: # Use universal converter for other formats return convert_results(result, return_as=return_as) return result
def _plot_kruskal(groups, var_names, result, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) n_groups = len(groups) positions = list(range(n_groups)) violin_swarm(ax, groups, positions, var_names) if result["significant"]: significance_bracket(ax, 0, n_groups - 1, result["stars"], groups) stats_text_box( ax, [ fmt_stat("H", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), fmt_stat("epsilon2", result["effect_size"]), ], ) ax.set_title("Kruskal-Wallis Test") def main(args): # noqa: C901 """Demonstrate Kruskal-Wallis test functionality.""" import scitex as stx logger.info("Demonstrating Kruskal-Wallis H test") # Set random seed np.random.seed(42) # Example 1: Three groups with clear differences logger.info("\n=== Example 1: Three groups with clear differences ===") group1 = np.random.normal(5, 1, 30) group2 = np.random.normal(7, 1, 30) group3 = np.random.normal(9, 1, 30) result1 = test_kruskal( [group1, group2, group3], var_names=["Group A", "Group B", "Group C"], verbose=True, ) # Example 2: No significant difference logger.info("\n=== Example 2: No significant difference ===") group1 = np.random.normal(5, 1, 30) group2 = np.random.normal(5.2, 1, 30) group3 = np.random.normal(4.9, 1, 30) result2 = test_kruskal( [group1, group2, group3], var_names=["Control", "Treatment 1", "Treatment 2"], verbose=True, ) # Example 3: Non-normal data with outliers (with visualization) logger.info("\n=== Example 3: Non-normal data with outliers ===") group1 = np.concatenate([np.random.exponential(2, 25), [20, 22]]) # Outliers group2 = np.random.exponential(3, 27) group3 = np.random.exponential(4, 28) result3 = test_kruskal( [group1, group2, group3], var_names=["Exponential 1", "Exponential 2", "Exponential 3"], plot=True, verbose=True, ) stx.io.save(_mpl_plt.gcf(), "./kruskal_example3.jpg") _mpl_plt.close() # Example 4: Four groups comparison logger.info("\n=== Example 4: Four groups comparison ===") group1 = np.random.normal(10, 2, 25) group2 = np.random.normal(12, 2, 25) group3 = np.random.normal(14, 2, 25) group4 = np.random.normal(16, 2, 25) result4 = test_kruskal( [group1, group2, group3, group4], var_names=["Dose 0", "Dose 1", "Dose 2", "Dose 3"], verbose=True, ) # Example 5: Ordinal data (Likert scale) logger.info("\n=== Example 5: Ordinal data (Likert scale responses) ===") # Simulated Likert scale data (1-5) likert1 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.15, 0.40, 0.30, 0.10] ) likert2 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.10, 0.20, 0.30, 0.25, 0.15] ) likert3 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.10, 0.25, 0.35, 0.25] ) result5 = test_kruskal( [likert1, likert2, likert3], var_names=["Condition A", "Condition B", "Condition C"], verbose=True, ) logger.info( f"Medians: {np.median(likert1):.1f}, {np.median(likert2):.1f}, {np.median(likert3):.1f}" ) # Example 6: Post-hoc pairwise comparisons logger.info("\n=== Example 6: Post-hoc pairwise comparisons ===") from ...correct._correct_bonferroni import correct_bonferroni from ._test_brunner_munzel import test_brunner_munzel # Use data from Example 1 groups = [group1, group2, group3] names = ["Group A", "Group B", "Group C"] # Perform overall test overall = test_kruskal(groups, var_names=names) if overall["significant"]: logger.info( "Overall test significant. Performing post-hoc pairwise comparisons..." ) # Pairwise comparisons pairwise_results = [] for i in range(len(groups)): for j in range(i + 1, len(groups)): result = test_brunner_munzel( groups[i], groups[j], var_x=names[i], var_y=names[j] ) pairwise_results.append(result) logger.info( f"{names[i]} vs {names[j]}: " f"p = {result['pvalue']:.4f} {result['stars']}" ) # Apply Bonferroni correction corrected = correct_bonferroni(pairwise_results) logger.info("\nAfter Bonferroni correction:") for res in corrected: logger.info( f"{res['var_x']} vs {res['var_y']}: " # type: ignore[index] f"p_adjusted = {res['pvalue_adjusted']:.4f}, " # type: ignore[index] f"significant = {res['significant']}" # type: ignore[index] ) # Example 7: Export results logger.info("\n=== Example 7: Export results to multiple formats ===") from scitex_stats._utils._normalizers import force_dataframe # Create multiple test results test_results = [result1, result2, result3, result4, result5] # Export to DataFrame df = force_dataframe(test_results) logger.info(f"\nDataFrame shape: {df.shape}") logger.info(f"Columns: {df.columns.tolist()}") # Export via pandas — convert_results doesn't emit Excel/CSV. df.to_excel("./kruskal_tests.xlsx", index=False) logger.info("Results exported to Excel") df.to_csv("./kruskal_tests.csv", index=False) logger.info("Results exported to CSV") return 0 def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser(description="Demonstrate Kruskal-Wallis H test") parser.add_argument("--verbose", action="store_true", help="Enable verbose output") return parser.parse_args() def run_main(): """Initialize SciTeX framework and run main.""" import sys import matplotlib.pyplot as plt import scitex as stx args = parse_args() CONFIG, sys.stdout, sys.stderr, plt, _CC, _rng_manager = stx.session.start( sys, plt, args=args, file=__file__, verbose=args.verbose, agg=True, ) exit_status = main(args) stx.session.close( CONFIG, verbose=args.verbose, exit_status=exit_status, ) if __name__ == "__main__": run_main() # EOF