Source code for scitex_ml.classification.reporters._SingleClassificationReporter

#!/usr/bin/env python3
# Timestamp: "2026-01-24 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex-python/src/scitex/ai/classification/reporters/_SingleClassificationReporter.py

from __future__ import annotations
"""
Improved Single Classification Reporter with unified API.

This module provides a comprehensive classification reporter that:
- Uses unified API interface
- Supports lazy directory creation
- Provides numerical precision control
- Creates visualizations with proper error handling
- Maintains consistent parameter naming

The main class inherits from multiple mixins for modular functionality:
- MetricsMixin: Metrics calculation and aggregation
- StorageMixin: File storage and organization
- PlottingMixin: Visualization generation
- FeatureImportanceMixin: Feature importance analysis
- CVSummaryMixin: Cross-validation summary generation
- ReportsMixin: Multi-format report generation
"""


from pathlib import Path
from pprint import pprint
from typing import Any, Dict, List, Optional, Union

from scitex_logging import getLogger

from ._BaseClassificationReporter import BaseClassificationReporter, ReporterConfig
from ._mixins import (
    CVSummaryMixin,
    FeatureImportanceMixin,
    MetricsMixin,
    PlottingMixin,
    ReportsMixin,
    StorageMixin,
)
from .reporter_utils._Plotter import Plotter
from .reporter_utils.storage import MetricStorage

logger = getLogger(__name__)


[docs] class SingleTaskClassificationReporter( MetricsMixin, StorageMixin, PlottingMixin, FeatureImportanceMixin, CVSummaryMixin, ReportsMixin, BaseClassificationReporter, ): """ Improved single-task classification reporter with unified API. Key improvements: - Inherits from BaseClassificationReporter for consistent API - Lazy directory creation (no empty folders) - Numerical precision control - Graceful plotting with proper error handling - Consistent parameter names across all methods Features: - Comprehensive metrics calculation (balanced accuracy, MCC, ROC-AUC, PR-AUC, etc.) - Automated visualization generation: * Confusion matrices * ROC and Precision-Recall curves * Feature importance plots * CV aggregation plots with faded fold lines * Comprehensive metrics dashboard - Multi-format report generation (Org, Markdown, LaTeX, HTML, DOCX, PDF) - Cross-validation support with automatic fold aggregation Parameters ---------- output_dir : Union[str, Path] Base directory for outputs. If None, creates timestamped directory. config : ReporterConfig, optional Configuration object for advanced settings verbose : bool, default True Print initialization message **kwargs Additional arguments passed to base class Examples -------- >>> # Basic usage >>> reporter = SingleTaskClassificationReporter("./results") >>> metrics = reporter.calculate_metrics(y_true, y_pred, y_proba, labels=['A', 'B']) >>> reporter.save_summary() >>> # Cross-validation with automatic CV aggregation plots >>> for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)): ... metrics = reporter.calculate_metrics( ... y_test, y_pred, y_proba, fold=fold ... ) >>> reporter.save_summary() # Automatically creates CV aggregation visualizations >>> # Feature importance visualization >>> reporter.plotter.create_feature_importance_plot( ... feature_importance=importances, ... feature_names=feature_names, ... save_path=output_dir / "feature_importance.png" ... ) """
[docs] def __init__( self, output_dir: Union[str, Path], config: Optional[ReporterConfig] = None, verbose: bool = True, **kwargs, ): if config is None: config = ReporterConfig() super().__init__( output_dir=output_dir, precision=config.precision, **kwargs, ) self.config = config self.session_config = None self.storage = MetricStorage(self.output_dir, precision=self.precision) self.plotter = Plotter(enable_plotting=True) self.fold_metrics: Dict[int, Dict[str, Any]] = {} self.all_predictions: List[Dict[str, Any]] = [] if verbose: logger.info( f"{self.__class__.__name__} initialized with output directory: {self.output_dir}" )
[docs] def set_session_config(self, config: Any) -> None: """ Set the SciTeX session CONFIG object for inclusion in reports. Parameters ---------- config : Any The SciTeX session CONFIG object """ self.session_config = config
[docs] def save_summary( self, filename: str = "cv_summary/summary.json", verbose: bool = True ) -> Path: """ Save summary to file, create CV summary visualizations, and generate reports. Parameters ---------- filename : str, default "cv_summary/summary.json" Filename for summary (now in cv_summary directory) verbose : bool, default True Print summary to console Returns ------- Path Path to saved summary file """ summary = self.get_summary() try: possible_paths = [ self.output_dir.parent / "CONFIGS" / "CONFIG.yaml", self.output_dir.parent.parent / "CONFIGS" / "CONFIG.yaml", self.output_dir / "CONFIGS" / "CONFIG.yaml", ] config_path = None for path in possible_paths: if path.exists(): config_path = path break if config_path and config_path.exists(): import yaml with open(config_path) as config_file: config_data = yaml.safe_load(config_file) summary["experiment_configuration"] = config_data except Exception as e: logger.warning(f"Could not load CONFIG.yaml: {e}") self._save_cv_summary_metrics(summary) self.save_cv_summary_confusion_matrix(summary) self.create_cv_summary_curves(summary) self.create_cv_aggregation_visualizations( show_individual_folds=True, fold_alpha=0.15 ) self._save_cv_summary_classification_report(summary) self.generate_reports() cv_summary_dir = self._create_subdir_if_needed("cv_summary") cv_summary_dir.mkdir(parents=True, exist_ok=True) if verbose: print() logger.info("Summary:") pprint(summary) return self.storage.save(summary, "cv_summary/summary.json")
def __repr__(self) -> str: fold_count = len(self.fold_metrics) return ( f"SingleTaskClassificationReporter(" f"folds={fold_count}, " f"output_dir='{self.output_dir}')" )
# EOF