Source code for scitex_stats.tests.nonparametric._test_kruskal

#!/usr/bin/env python3
# Timestamp: "2025-10-01 18:14:34 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_kruskal.py

r"""
Kruskal-Wallis H test for independent samples.

Functionalities:
  - Perform Kruskal-Wallis H test for independent samples
  - Non-parametric alternative to one-way ANOVA
  - Compute epsilon-squared effect size
  - Generate box plots with significance annotations
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Multiple independent samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import List, Literal, Optional, Union

import matplotlib
import matplotlib.axes
import numpy as np
import pandas as pd
from scipy import stats

import scitex as stx
from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


[docs] def test_kruskal( # noqa: C901 groups: Optional[List[Union[np.ndarray, pd.Series]]] = None, var_names: Optional[List[str]] = None, alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, data: Union[pd.DataFrame, str, None] = None, value_col: Optional[str] = None, group_col: Optional[str] = None, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Kruskal-Wallis H test for independent samples. Parameters ---------- groups : list of arrays List of sample arrays for each group (minimum 2 groups) var_names : list of str, optional Names for each group. If None, uses 'Group 1', 'Group 2', etc. alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate box plots ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. data : DataFrame, str, or None, optional DataFrame or CSV path. When provided with value_col and group_col, groups are extracted automatically (seaborn-style). value_col : str, optional Column containing measurement values (used with data=). group_col : str, optional Column containing group labels (used with data=). return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding verbose : bool, default False Whether to print test results Returns ------- results : dict or DataFrame Test results including: - test_method: 'Kruskal-Wallis H test' - statistic: H-statistic value - pvalue: p-value - stars: Significance stars - significant: Whether null hypothesis is rejected - effect_size: Epsilon-squared (ε²) - effect_size_metric: 'epsilon-squared' - effect_size_interpretation: Interpretation of epsilon-squared - n_groups: Number of groups - n_samples: Sample sizes for each group - var_names: Group labels - H0: Null hypothesis description Notes ----- The Kruskal-Wallis H test is a non-parametric alternative to one-way ANOVA. It tests whether samples originate from the same distribution by comparing the ranks of observations across groups. **Null Hypothesis (H0)**: All groups have the same population median (more precisely: all groups have identical distribution functions) **Assumptions**: - Independent observations within and between groups - Ordinal or continuous data - Similar distribution shapes across groups (for median interpretation) **Advantages over ANOVA**: - No normality assumption required - Robust to outliers - Works with ordinal data - More powerful than ANOVA for heavy-tailed distributions **When to use**: - Comparing 3+ independent groups - Data violate normality (use test_shapiro to check) - Presence of outliers - Ordinal data (e.g., Likert scales) **Test Statistic H**: .. math:: H = \frac{12}{N(N+1)} \sum_{i=1}^{k} \frac{R_i^2}{n_i} - 3(N+1) Where: - k: Number of groups - N: Total sample size - R_i: Sum of ranks for group i - n_i: Sample size of group i **Effect Size (Epsilon-squared)**: .. math:: \epsilon^2 = \frac{H - k + 1}{N - k} Interpretation (similar to eta-squared): - ε² < 0.01: negligible - ε² < 0.06: small - ε² < 0.14: medium - ε² ≥ 0.14: large **Post-hoc tests**: If significant, use pairwise comparisons with correction: - test_brunner_munzel() for all pairs - correct_bonferroni() or correct_fdr() for multiple comparisons **Tied ranks**: Handled automatically by scipy.stats.kruskal() References ---------- .. [1] Kruskal, W. H., & Wallis, W. A. (1952). "Use of ranks in one-criterion variance analysis". Journal of the American Statistical Association, 47(260), 583-621. .. [2] Hecke, T. V. (2012). "Power study of ANOVA versus Kruskal-Wallis test". Journal of Statistics and Management Systems, 15(2-3), 241-247. .. [3] Tomczak, M., & Tomczak, E. (2014). "The need to report effect size estimates revisited". Trends in Sport Sciences, 21(1), 19-25. Examples -------- >>> # Three groups with different medians >>> group1 = np.array([1, 2, 3, 4, 5]) >>> group2 = np.array([3, 4, 5, 6, 7]) >>> group3 = np.array([5, 6, 7, 8, 9]) >>> result = test_kruskal([group1, group2, group3]) >>> result['rejected'] True >>> # With custom names and plot >>> result, fig = test_kruskal( ... [group1, group2, group3], ... var_names=['Control', 'Treatment 1', 'Treatment 2'], ... plot=True ... ) >>> # Export results >>> from scitex_stats.utils._normalizers import convert_results >>> convert_results(result, return_as='excel', path='kruskal_results.xlsx') """ from scitex_stats._utils._effect_size import ( epsilon_squared, interpret_epsilon_squared, ) from scitex_stats._utils._formatters import p2stars from scitex_stats._utils._normalizers import convert_results, force_dataframe # Resolve groups from DataFrame (seaborn-style data= parameter) if data is not None and value_col is not None and group_col is not None: from scitex_stats._utils._csv_support import resolve_groups groups, group_names = resolve_groups(data, value_col, group_col) if var_names is None: var_names = group_names # Validate input if len(groups) < 2: raise ValueError("Kruskal-Wallis test requires at least 2 groups") # Convert to numpy arrays and remove NaN groups = [np.asarray(g) for g in groups] groups = [g[~np.isnan(g)] for g in groups] # Check if all groups have at least 5 observations (recommended) for i, g in enumerate(groups): if len(g) < 5: logger.warning( f"Group {i + 1} has only {len(g)} observations. " "Kruskal-Wallis test is most reliable with n ≥ 5 per group." ) # Generate default names if not provided if var_names is None: var_names = [f"Group {i + 1}" for i in range(len(groups))] if len(var_names) != len(groups): raise ValueError("Number of var_names must match number of groups") # Get sample sizes n_samples = [len(g) for g in groups] n_groups = len(groups) # Perform Kruskal-Wallis test h_result = stats.kruskal(*groups) h_stat = float(h_result.statistic) pvalue = float(h_result.pvalue) # Determine rejection rejected = pvalue < alpha # Compute effect size (epsilon-squared) effect_size = epsilon_squared(groups) effect_size_interp = interpret_epsilon_squared(effect_size) # Compile results result = { "test_method": "Kruskal-Wallis H test", "statistic": round(h_stat, decimals), "stat_symbol": "H", "n_groups": n_groups, "n_samples": n_samples, "var_names": var_names, "pvalue": round(pvalue, decimals), "stars": p2stars(pvalue), "alpha": alpha, "significant": rejected, "effect_size": round(effect_size, decimals), "effect_size_metric": "epsilon-squared", "effect_size_interpretation": effect_size_interp, "H0": "All groups have the same population median", } # Add post-hoc recommendation if significant if rejected: result["recommendation"] = ( "Significant difference detected. Perform post-hoc pairwise comparisons " "with test_brunner_munzel() and apply correction (correct_bonferroni or correct_fdr)." ) else: result["recommendation"] = "No significant difference between groups." # Log results if verbose if verbose: logger.info( f"Kruskal-Wallis: H = {h_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}" ) logger.info(f"Epsilon-squared = {effect_size:.3f} ({effect_size_interp})") # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = stx.plt.subplots() _plot_kruskal(groups, var_names, result, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) elif return_as not in ["dict", "dataframe"]: # Use universal converter for other formats return convert_results(result, return_as=return_as) return result
def _plot_kruskal(groups, var_names, result, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) n_groups = len(groups) positions = list(range(n_groups)) violin_swarm(ax, groups, positions, var_names) if result["significant"]: significance_bracket(ax, 0, n_groups - 1, result["stars"], groups) stats_text_box( ax, [ fmt_stat("H", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), fmt_stat("epsilon2", result["effect_size"]), ], ) ax.set_title("Kruskal-Wallis Test") def main(args): # noqa: C901 """Demonstrate Kruskal-Wallis test functionality.""" logger.info("Demonstrating Kruskal-Wallis H test") # Set random seed np.random.seed(42) # Example 1: Three groups with clear differences logger.info("\n=== Example 1: Three groups with clear differences ===") group1 = np.random.normal(5, 1, 30) group2 = np.random.normal(7, 1, 30) group3 = np.random.normal(9, 1, 30) result1 = test_kruskal( [group1, group2, group3], var_names=["Group A", "Group B", "Group C"], verbose=True, ) # Example 2: No significant difference logger.info("\n=== Example 2: No significant difference ===") group1 = np.random.normal(5, 1, 30) group2 = np.random.normal(5.2, 1, 30) group3 = np.random.normal(4.9, 1, 30) result2 = test_kruskal( [group1, group2, group3], var_names=["Control", "Treatment 1", "Treatment 2"], verbose=True, ) # Example 3: Non-normal data with outliers (with visualization) logger.info("\n=== Example 3: Non-normal data with outliers ===") group1 = np.concatenate([np.random.exponential(2, 25), [20, 22]]) # Outliers group2 = np.random.exponential(3, 27) group3 = np.random.exponential(4, 28) result3 = test_kruskal( [group1, group2, group3], var_names=["Exponential 1", "Exponential 2", "Exponential 3"], plot=True, verbose=True, ) stx.io.save(stx.plt.gcf(), "./kruskal_example3.jpg") stx.plt.close() # Example 4: Four groups comparison logger.info("\n=== Example 4: Four groups comparison ===") group1 = np.random.normal(10, 2, 25) group2 = np.random.normal(12, 2, 25) group3 = np.random.normal(14, 2, 25) group4 = np.random.normal(16, 2, 25) result4 = test_kruskal( [group1, group2, group3, group4], var_names=["Dose 0", "Dose 1", "Dose 2", "Dose 3"], verbose=True, ) # Example 5: Ordinal data (Likert scale) logger.info("\n=== Example 5: Ordinal data (Likert scale responses) ===") # Simulated Likert scale data (1-5) likert1 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.15, 0.40, 0.30, 0.10] ) likert2 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.10, 0.20, 0.30, 0.25, 0.15] ) likert3 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.10, 0.25, 0.35, 0.25] ) result5 = test_kruskal( [likert1, likert2, likert3], var_names=["Condition A", "Condition B", "Condition C"], verbose=True, ) logger.info( f"Medians: {np.median(likert1):.1f}, {np.median(likert2):.1f}, {np.median(likert3):.1f}" ) # Example 6: Post-hoc pairwise comparisons logger.info("\n=== Example 6: Post-hoc pairwise comparisons ===") from ...correct._correct_bonferroni import correct_bonferroni from ._test_brunner_munzel import test_brunner_munzel # Use data from Example 1 groups = [group1, group2, group3] names = ["Group A", "Group B", "Group C"] # Perform overall test overall = test_kruskal(groups, var_names=names) if overall["significant"]: logger.info( "Overall test significant. Performing post-hoc pairwise comparisons..." ) # Pairwise comparisons pairwise_results = [] for i in range(len(groups)): for j in range(i + 1, len(groups)): result = test_brunner_munzel( groups[i], groups[j], var_x=names[i], var_y=names[j] ) pairwise_results.append(result) logger.info( f"{names[i]} vs {names[j]}: " f"p = {result['pvalue']:.4f} {result['stars']}" ) # Apply Bonferroni correction corrected = correct_bonferroni(pairwise_results) logger.info("\nAfter Bonferroni correction:") for res in corrected: logger.info( f"{res['var_x']} vs {res['var_y']}: " # type: ignore[index] f"p_adjusted = {res['pvalue_adjusted']:.4f}, " # type: ignore[index] f"significant = {res['significant']}" # type: ignore[index] ) # Example 7: Export results logger.info("\n=== Example 7: Export results to multiple formats ===") from scitex_stats._utils._normalizers import convert_results, force_dataframe # Create multiple test results test_results = [result1, result2, result3, result4, result5] # Export to DataFrame df = force_dataframe(test_results) logger.info(f"\nDataFrame shape: {df.shape}") logger.info(f"Columns: {df.columns.tolist()}") # Export to Excel with styling convert_results(test_results, return_as="excel", path="./kruskal_tests.xlsx") # type: ignore[arg-type] logger.info("Results exported to Excel") # Export to CSV convert_results(test_results, return_as="csv", path="./kruskal_tests.csv") # type: ignore[arg-type] logger.info("Results exported to CSV") return 0 def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser(description="Demonstrate Kruskal-Wallis H test") parser.add_argument("--verbose", action="store_true", help="Enable verbose output") return parser.parse_args() def run_main(): """Initialize SciTeX framework and run main.""" import sys import matplotlib.pyplot as plt args = parse_args() CONFIG, sys.stdout, sys.stderr, plt, _CC, _rng_manager = stx.session.start( sys, plt, args=args, file=__file__, verbose=args.verbose, agg=True, ) exit_status = main(args) stx.session.close( CONFIG, verbose=args.verbose, exit_status=exit_status, ) if __name__ == "__main__": run_main() # EOF