Source code for scitex_stats._bundle_io

#!/usr/bin/env python3
# File: src/scitex_stats/_bundle_io.py
"""scitex_stats ↔ SciTeX bundle I/O.

Convert ``run_test``-shaped result dicts into the canonical
``scitex_stats._dataclasses.Stats`` schema, and persist / load them as
SciTeX bundles (``kind="stats"``) via :mod:`scitex_io.bundle`.

``scitex-io`` is an OPTIONAL dependency (extra ``[bundle]``). This module
imports cleanly without it; the three public functions raise a clear
``ImportError`` when called in an environment that lacks ``scitex-io``::

    pip install scitex-stats[bundle]   # or: uv pip install 'scitex-stats[bundle]'

The statistical dataclasses (``Stats``, ``Analysis``, ``StatMethod``,
``StatResult``, ``EffectSize``) live here in ``scitex_stats`` as the single
source of truth; ``scitex_io.bundle`` imports them from this package. Hence
``test_result_to_stats`` only needs scitex-io for API consistency with the
bundle round-trip, not for the conversion itself.
"""

from __future__ import annotations

import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

# ---------------------------------------------------------------------------
# Optional scitex-io.bundle import. The Bundle class is the only piece that
# genuinely requires scitex-io; the Stats schema itself is local (below).
# ---------------------------------------------------------------------------
try:
    from scitex_io.bundle import Bundle as _Bundle  # type: ignore[import-not-found]

    BUNDLE_AVAILABLE = True
except ImportError:  # pragma: no cover - exercised via sys.modules shim test
    _Bundle = None  # type: ignore[assignment]
    BUNDLE_AVAILABLE = False

_MISSING_MSG = (
    "scitex-io is required for stats bundle I/O; install scitex-stats[bundle]"
)


def _require_bundle() -> None:
    if not BUNDLE_AVAILABLE:
        raise ImportError(_MISSING_MSG)


[docs] def test_result_to_stats(result: Dict[str, Any]): """Convert a test-result dict to the canonical ``Stats`` schema. Parameters ---------- result : dict Test result dictionary. Supports both formats: Legacy flat format:: {"name": "Control vs Treatment", "method": "t-test", "p_value": 0.003, "effect_size": 1.21, "ci95": [0.5, 1.8]} New nested format (from test functions):: {"method": {"name": "t-test", "variant": "independent"}, "results": {"statistic": 2.5, "statistic_name": "t", "p_value": 0.01}} Returns ------- Stats Stats object suitable for bundle storage. Raises ------ ImportError If scitex-io is not installed. """ _require_bundle() from scitex_stats._dataclasses import ( Analysis, EffectSize, StatMethod, StatResult, Stats, ) method_data = result.get("method", {}) if isinstance(method_data, str): method = StatMethod(name=method_data, variant=None, parameters={}) effect_size = None es_val = result.get("effect_size") if es_val is not None: ci = result.get("ci95", []) effect_size = EffectSize( name="d", value=float(es_val), ci_lower=ci[0] if len(ci) > 0 else None, ci_upper=ci[1] if len(ci) > 1 else None, ) stat_result = StatResult( statistic=result.get("statistic", 0.0), statistic_name=result.get("statistic_name", ""), p_value=result.get("p_value", 1.0), df=result.get("df"), effect_size=effect_size, significant=result.get("p_value", 1.0) < 0.05, alpha=0.05, ) analysis_name = result.get("name", "comparison") else: method = StatMethod( name=method_data.get("name", "unknown"), variant=method_data.get("variant"), parameters=method_data.get("parameters", {}), ) results_data = result.get("results", {}) effect_size = None if "effect_size" in results_data: es = results_data["effect_size"] effect_size = EffectSize( name=es.get("name", ""), value=es.get("value", 0.0), ci_lower=es.get("ci_lower"), ci_upper=es.get("ci_upper"), ) stat_result = StatResult( statistic=results_data.get("statistic", 0.0), statistic_name=results_data.get("statistic_name", ""), p_value=results_data.get("p_value", 1.0), df=results_data.get("df"), effect_size=effect_size, significant=results_data.get("significant"), alpha=results_data.get("alpha", 0.05), ) analysis_name = result.get("name", "analysis") inputs = dict(result.get("inputs", {})) inputs["comparison_name"] = analysis_name analysis = Analysis( result_id=str(uuid.uuid4()), method=method, results=stat_result, inputs=inputs, ) return Stats(analyses=[analysis])
[docs] def save_stats( comparisons: Union[List[Dict[str, Any]], Any], path: Union[str, Path], metadata: Optional[Dict[str, Any]] = None, as_zip: bool = False, ) -> Path: """Save statistical results as a SciTeX bundle (``kind="stats"``). Parameters ---------- comparisons : list of dict, or Stats List of comparison-result dicts (each converted via :func:`test_result_to_stats`), or an already-built ``Stats`` object. path : str or Path Output bundle path (directory, or ``.zip`` when ``as_zip``). metadata : dict, optional Currently unused placeholder kept for API stability. as_zip : bool, optional If True, save as a ZIP archive (``.zip`` suffix enforced). Returns ------- Path Path to the saved bundle. Raises ------ ImportError If scitex-io is not installed. """ _require_bundle() from scitex_stats._dataclasses import Stats p = Path(path) if as_zip and p.suffix != ".zip": p = p.with_suffix(".zip") bundle = _Bundle(p, create=True, kind="stats") if comparisons: if isinstance(comparisons, list) and isinstance(comparisons[0], dict): stats = Stats(analyses=[]) for comp in comparisons: analysis_stats = test_result_to_stats(comp) stats.analyses.extend(analysis_stats.analyses) bundle.stats = stats else: bundle.stats = comparisons # Stats bundles carry no renderable figure; validation reads payload from # disk before save writes it, so skip both render and validate here. bundle.save(validate=False, render=False) return p
[docs] def load_stats(path: Union[str, Path]) -> Dict[str, Any]: """Load a stats bundle into a flat, plot-friendly dict. Parameters ---------- path : str or Path Path to the bundle. Returns ------- dict ``{"comparisons": [...], "metadata": {...}}`` where each comparison is a flat dict (name, method, p_value, effect_size, ci95, formatted). Raises ------ ImportError If scitex-io is not installed. """ _require_bundle() bundle = _Bundle(path) comparisons: List[Dict[str, Any]] = [] if bundle.stats and bundle.stats.analyses: for analysis in bundle.stats.analyses: ad = analysis.to_dict() p_val = ad.get("results", {}).get("p_value", 1.0) es_data = ad.get("results", {}).get("effect_size", {}) es_val = es_data.get("value", 0.0) if es_data else 0.0 ci = [es_data.get("ci_lower"), es_data.get("ci_upper")] if es_data else [] ci = [v for v in ci if v is not None] if p_val < 0.001: formatted = "***" elif p_val < 0.01: formatted = "**" elif p_val < 0.05: formatted = "*" else: formatted = "ns" comparisons.append( { "name": ad.get("inputs", {}).get("comparison_name", "comparison"), "method": ad.get("method", {}).get("name", "unknown"), "p_value": p_val, "effect_size": es_val, "ci95": ci, "formatted": formatted, } ) return { "comparisons": comparisons, "metadata": bundle.spec.to_dict() if bundle.spec else {}, }
__all__ = [ "BUNDLE_AVAILABLE", "test_result_to_stats", "save_stats", "load_stats", ] # EOF