Source code for scitex_stats.auto._formatting

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-12-10 (ywatanabe)"
# File: scitex_stats/auto/_formatting.py

"""
Statistical Formatting - Publication-ready output generation.

This module provides functions for:
- Computing summary statistics per group
- Formatting complete test result lines
- Converting results for Inspector panel display
- Handling multiple comparison corrections

All formatting respects journal style presets (APA, Nature, Cell, Elsevier).
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Dict, List, Optional, TypedDict, Union

import numpy as np

from ._styles import StatStyle, get_stat_style
from ._summary import (
    SummaryStatsDict,
    compute_summary_from_groups,
    compute_summary_stats,
)
from ._symbols import get_stat_symbol

# =============================================================================
# Type Definitions
# =============================================================================


class SummaryStatsDict(TypedDict, total=False):
    """
    Summary statistics for a single group.

    Attributes
    ----------
    group : str
        Group name/label.
    n : int
        Sample size.
    mean : float or None
        Mean value.
    sd : float or None
        Standard deviation.
    sem : float or None
        Standard error of mean.
    median : float or None
        Median value.
    iqr : float or None
        Interquartile range.
    q1 : float or None
        First quartile (25th percentile).
    q3 : float or None
        Third quartile (75th percentile).
    minimum : float or None
        Minimum value.
    maximum : float or None
        Maximum value.
    """

    group: str
    n: int
    mean: Optional[float]
    sd: Optional[float]
    sem: Optional[float]
    median: Optional[float]
    iqr: Optional[float]
    q1: Optional[float]
    q3: Optional[float]
    minimum: Optional[float]
    maximum: Optional[float]


class TestResultDict(TypedDict, total=False):
    """
    Result structure for a single statistical test.

    Attributes
    ----------
    test_name : str
        Internal test name ("ttest_ind", "brunner_munzel", etc.).
    p_raw : float or None
        Raw p-value.
    p_adj : float or None
        Adjusted p-value after multiple correction.
    stat : float or None
        Test statistic value.
    df : float or None
        Degrees of freedom.
    method : str or None
        Human-readable method label.
    correction_method : str or None
        Multiple correction method used ("bonferroni", "fdr_bh", etc.).
    details : dict
        Additional test-specific information.
    """

    test_name: str
    p_raw: Optional[float]
    p_adj: Optional[float]
    stat: Optional[float]
    df: Optional[float]
    method: Optional[str]
    correction_method: Optional[str]
    details: Dict[str, Any]


class EffectResultDict(TypedDict, total=False):
    """
    Result structure for a single effect size measure.

    Attributes
    ----------
    name : str
        Internal name ("cohens_d_ind", "eta_squared", etc.).
    label : str
        Human-readable label.
    value : float
        Effect size value.
    ci_lower : float or None
        Lower bound of confidence interval.
    ci_upper : float or None
        Upper bound of confidence interval.
    note : str or None
        Interpretation note (e.g., "small", "medium", "large").
    """

    name: str
    label: str
    value: float
    ci_lower: Optional[float]
    ci_upper: Optional[float]
    note: Optional[str]


# =============================================================================
# =============================================================================
# Test Line Formatting
# =============================================================================


def format_test_line(
    test: TestResultDict,
    effects: Optional[List[EffectResultDict]] = None,
    summary: Optional[List[SummaryStatsDict]] = None,
    style: Optional[Union[str, StatStyle]] = None,
    include_n: bool = True,
    max_effects: int = 2,
) -> str:
    """
    Format a complete statistical result line.

    Produces publication-ready formatted text with proper italics,
    symbols, and formatting according to the specified journal style.

    Parameters
    ----------
    test : TestResultDict
        Test result dictionary.
    effects : list of EffectResultDict, optional
        Effect size results to include.
    summary : list of SummaryStatsDict, optional
        Summary statistics for sample size display.
    style : str or StatStyle, optional
        Style to use. Can be style ID or StatStyle instance.
        Defaults to APA LaTeX.
    include_n : bool
        Whether to include sample sizes in output.
    max_effects : int
        Maximum number of effect sizes to include.

    Returns
    -------
    str
        Formatted result line.

    Examples
    --------
    >>> test = {"test_name": "ttest_ind", "stat": 2.31, "df": 28.0, "p_raw": 0.028}
    >>> effects = [{"name": "cohens_d_ind", "value": 0.72, "label": "Cohen's d"}]
    >>> summary = [{"group": "A", "n": 15}, {"group": "B", "n": 15}]
    >>> line = format_test_line(test, effects, summary, style="apa_latex")
    >>> "\\mathit{t}" in line
    True
    """
    # Get style
    if style is None:
        style = get_stat_style("apa_latex")
    elif isinstance(style, str):
        style = get_stat_style(style)

    parts: List[str] = []

    # Format test statistic
    test_name = test.get("test_name", "")
    stat = test.get("stat")
    df = test.get("df")

    if stat is not None:
        symbol = get_stat_symbol(test_name)
        stat_part = style.format_stat(symbol, stat, df)
        parts.append(stat_part)

    # Format p-value
    p = test.get("p_adj") or test.get("p_raw")
    if p is not None:
        p_part = style.format_p(p)
        parts.append(p_part)

    # Format effect sizes
    if effects:
        for eff in effects[:max_effects]:
            eff_name = eff.get("name", "")
            eff_value = eff.get("value")
            if eff_value is not None:
                eff_part = style.format_effect(eff_name, eff_value)
                parts.append(eff_part)

    # Format sample sizes
    if include_n and summary:
        for s in summary:
            group_name = str(s.get("group", ""))
            n_value = int(s.get("n", 0))
            n_part = style.format_n(group_name, n_value)
            parts.append(n_part)

    return ", ".join(parts)


def format_test_line_compact(
    test: TestResultDict,
    style: Optional[Union[str, StatStyle]] = None,
) -> str:
    """
    Format a compact test result (statistic + p-value only).

    Parameters
    ----------
    test : TestResultDict
        Test result dictionary.
    style : str or StatStyle, optional
        Style to use.

    Returns
    -------
    str
        Compact formatted result.
    """
    return format_test_line(
        test,
        effects=None,
        summary=None,
        style=style,
        include_n=False,
    )


# =============================================================================
# Inspector Panel Formatting
# =============================================================================


def format_for_inspector(
    test_results: List[TestResultDict],
    effect_results: Optional[List[EffectResultDict]] = None,
) -> Dict[str, List[Dict]]:
    """
    Format results for Inspector panel display.

    Produces a structure suitable for displaying in a UI panel
    with tables for tests and effect sizes.

    Parameters
    ----------
    test_results : list of TestResultDict
        Test results to display.
    effect_results : list of EffectResultDict, optional
        Effect size results to display.

    Returns
    -------
    dict
        Dictionary with 'tests' and 'effects' lists.

    Examples
    --------
    >>> tests = [{"test_name": "ttest_ind", "p_raw": 0.03, "stat": 2.2}]
    >>> effects = [{"name": "cohens_d_ind", "value": 0.8, "label": "Cohen's d"}]
    >>> result = format_for_inspector(tests, effects)
    >>> len(result["tests"])
    1
    """
    from ._selector import _pretty_label

    return {
        "tests": [
            {
                "name": r.get("test_name"),
                "label": _pretty_label(r.get("test_name", "")),
                "p_raw": r.get("p_raw"),
                "p_adj": r.get("p_adj"),
                "stat": r.get("stat"),
                "df": r.get("df"),
                "method": r.get("method"),
                "correction": r.get("correction_method"),
                "details": r.get("details", {}),
            }
            for r in test_results
        ],
        "effects": [
            {
                "name": e.get("name"),
                "label": e.get("label", e.get("name", "")),
                "value": e.get("value"),
                "ci_lower": e.get("ci_lower"),
                "ci_upper": e.get("ci_upper"),
                "note": e.get("note"),
            }
            for e in (effect_results or [])
        ],
    }


# =============================================================================
# P-value to Stars
# =============================================================================


[docs] def p_to_stars( p_value: Optional[float], style: Optional[Union[str, StatStyle]] = None, ) -> str: """ Convert p-value to significance stars. Uses the alpha thresholds from the specified style. Parameters ---------- p_value : float or None P-value to convert. style : str or StatStyle, optional Style to use for thresholds. Returns ------- str Stars string ("***", "**", "*", or "ns"). Examples -------- >>> p_to_stars(0.001) '***' >>> p_to_stars(0.03) '*' >>> p_to_stars(0.10) 'ns' """ if style is None: style = get_stat_style("apa_latex") elif isinstance(style, str): style = get_stat_style(style) return style.p_to_stars(p_value)
# ============================================================================= # Multiple Comparison Correction # ============================================================================= CorrectionMethod = Union[str, None] def apply_multiple_correction( results: List[TestResultDict], method: CorrectionMethod = "fdr_bh", ) -> List[TestResultDict]: """ Apply multiple-comparison correction to test results. Modifies results in-place by setting p_adj and correction_method. Parameters ---------- results : list of TestResultDict Test results with p_raw values. method : str or None Correction method: - "none": No correction (p_adj = p_raw) - "bonferroni": Bonferroni correction - "holm": Holm-Bonferroni step-down - "fdr_bh": Benjamini-Hochberg FDR Returns ------- list of TestResultDict Results with p_adj filled in. Examples -------- >>> results = [ ... {"test_name": "t1", "p_raw": 0.01}, ... {"test_name": "t2", "p_raw": 0.03}, ... {"test_name": "t3", "p_raw": 0.04}, ... ] >>> corrected = apply_multiple_correction(results, "bonferroni") >>> corrected[0]["p_adj"] 0.03 """ if method is None or method == "none": for r in results: r["p_adj"] = r.get("p_raw") r["correction_method"] = "none" return results # Get valid p-values valid_indices = [i for i, r in enumerate(results) if r.get("p_raw") is not None] if not valid_indices: return results p_values = [results[i]["p_raw"] for i in valid_indices] m = len(p_values) adjusted: List[float] = [] if method == "bonferroni": adjusted = [min(p * m, 1.0) for p in p_values] elif method == "holm": # Holm-Bonferroni step-down sorted_idx = sorted(range(m), key=lambda i: p_values[i]) adj = [0.0] * m cummax = 0.0 for rank, idx in enumerate(sorted_idx, start=1): adj_val = min((m - rank + 1) * p_values[idx], 1.0) adj_val = max(adj_val, cummax) # Enforce monotonicity adj[idx] = adj_val cummax = adj_val adjusted = adj elif method == "fdr_bh": # Benjamini-Hochberg sorted_idx = sorted(range(m), key=lambda i: p_values[i]) adj = [0.0] * m prev = 1.0 for rank in range(m, 0, -1): idx = sorted_idx[rank - 1] p = p_values[idx] bh = p * m / rank val = min(bh, prev, 1.0) adj[idx] = val prev = val adjusted = adj else: # Unknown method - no correction adjusted = p_values # Write back for local_i, global_i in enumerate(valid_indices): results[global_i]["p_adj"] = float(adjusted[local_i]) results[global_i]["correction_method"] = method return results # ============================================================================= # Public API # ============================================================================= __all__ = [ # Type definitions "SummaryStatsDict", "TestResultDict", "EffectResultDict", "CorrectionMethod", # Summary statistics "compute_summary_stats", "compute_summary_from_groups", # Symbol mapping "get_stat_symbol", # Formatting "format_test_line", "format_test_line_compact", "format_for_inspector", "p_to_stars", # Correction "apply_multiple_correction", ] # EOF