#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-10-02 06:38:58 (ywatanabe)"
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ml/classification/reporters/_ClassificationReporter.py
# ----------------------------------------
from __future__ import annotations
import scitex_io
import os
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------
"""
Unified Classification Reporter.
A single, unified reporter that handles both single-task and multi-task
classification scenarios seamlessly.
"""
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
import numpy as np
# Import base class and single reporter for internal use
from ._BaseClassificationReporter import BaseClassificationReporter, ReporterConfig
from ._SingleClassificationReporter import SingleTaskClassificationReporter
from .reporter_utils.storage import MetricStorage
[docs]
class ClassificationReporter(BaseClassificationReporter):
"""
Unified classification reporter for single and multi-task scenarios.
This reporter automatically adapts to your use case:
- Single task: Just use it without specifying tasks
- Multiple tasks: Specify tasks upfront or create them dynamically
- Seamless switching between single and multi-task workflows
Features:
- Comprehensive metrics calculation (balanced accuracy, MCC, ROC-AUC, PR-AUC, etc.)
- Automated visualization generation:
* Confusion matrices
* ROC and Precision-Recall curves
* Feature importance plots (via plotter)
* CV aggregation plots with faded fold lines
* Comprehensive metrics dashboard
- Multi-format report generation (Org, Markdown, LaTeX, HTML, DOCX, PDF)
- Cross-validation support with automatic fold aggregation
- Multi-task classification tracking
Parameters
----------
output_dir : Union[str, Path]
Base directory for outputs. If None, creates timestamped directory.
tasks : List[str], optional
List of task names. If None, tasks are created dynamically as needed.
precision : int, default 3
Number of decimal places for numerical outputs
required_metrics : List[str], optional
List of metrics to calculate. Defaults to comprehensive set.
verbose : bool, default True
Whether to print initialization messages
**kwargs
Additional arguments passed to base class
Examples
--------
>>> # Single task usage (no tasks specified)
>>> reporter = ClassificationReporter("./results")
>>> reporter.calculate_metrics(y_true, y_pred, y_proba)
>>> # Multi-task with predefined tasks
>>> reporter = ClassificationReporter("./results", tasks=["binary", "multiclass"])
>>> reporter.calculate_metrics(y_true, y_pred, task="binary")
>>> # Dynamic task creation
>>> reporter = ClassificationReporter("./results")
>>> reporter.calculate_metrics(y_true1, y_pred1, task="task1")
>>> reporter.calculate_metrics(y_true2, y_pred2, task="task2")
>>> # Feature importance visualization (via plotter)
>>> reporter._single_reporter.plotter.create_feature_importance_plot(
... feature_importance=importances,
... feature_names=feature_names,
... save_path="./results/feature_importance.png"
... )
>>> # CV aggregation plots (automatically created on save_summary)
>>> for fold in range(5):
... metrics = reporter.calculate_metrics(y_true, y_pred, y_proba, fold=fold)
>>> reporter.save_summary() # Creates CV aggregation plots with faded fold lines
"""
[docs]
def __init__(
self,
output_dir: Union[str, Path],
tasks: Optional[List[str]] = None,
precision: int = 3,
required_metrics: Optional[List[str]] = [
"balanced_accuracy",
"mcc",
"confusion_matrix",
"classification_report",
"roc_auc",
"roc_curve",
"pre_rec_auc",
"pre_rec_curve",
],
verbose: bool = True,
**kwargs,
):
# Set default metrics if not provided
if required_metrics is None:
required_metrics = [
"balanced_accuracy",
"mcc",
"confusion_matrix",
"classification_report",
"roc_auc",
"roc_curve",
"pre_rec_auc",
"pre_rec_curve",
]
# Create internal config from parameters
self.config = ReporterConfig(
precision=precision, required_metrics=required_metrics
)
# Initialize base class
super().__init__(output_dir=output_dir, precision=precision, **kwargs)
self.precision = precision
self.required_metrics = required_metrics
self.storage = MetricStorage(self.output_dir, precision=self.precision)
# Setup tasks
self.tasks = tasks if tasks is not None else []
self.verbose = verbose
# Create individual reporters for each task
self.reporters: Dict[str, SingleTaskClassificationReporter] = {}
# Single mode: Create a single reporter at the root level
# Multi mode: Create reporters in subdirectories
if not self.tasks:
# Single-task mode - use output_dir directly
self._single_reporter = SingleTaskClassificationReporter(
output_dir=self.output_dir, config=self.config, verbose=False
)
else:
# Multi-task mode - create subdirectories
self._single_reporter = None
self._setup_task_reporters()
# Save configuration
self._save_config()
# Print initialization info if verbose
if self.verbose and self.tasks:
print(f"\n{'=' * 70}")
print(f"Classification Reporter Initialized")
print(f"{'=' * 70}")
print(f"Output Directory: {self.output_dir.absolute()}")
print(f"Tasks: {self.tasks}")
print(f"{'=' * 70}\n")
def _create_single_reporter(self, task: str) -> None:
"""Create a single task reporter."""
task_output_dir = self.output_dir / task
self.reporters[task] = SingleTaskClassificationReporter(
output_dir=task_output_dir,
config=self.config,
verbose=False, # Suppress individual reporter messages
)
def _setup_task_reporters(self) -> None:
"""Setup individual reporters for each task."""
for task in self.tasks:
self._create_single_reporter(task)
[docs]
def calculate_metrics(
self,
y_true: np.ndarray,
y_pred: np.ndarray,
y_proba: Optional[np.ndarray] = None,
labels: Optional[List[str]] = None,
fold: Optional[int] = None,
task: Optional[str] = None,
verbose: bool = True,
model=None,
feature_names: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Calculate metrics for classification.
Automatically handles single vs multi-task scenarios:
- If no task specified and no tasks defined: creates "default" task
- If no task specified but tasks exist: uses first task
- If task specified: uses/creates that specific task
Parameters
----------
y_true : np.ndarray
True class labels
y_pred : np.ndarray
Predicted class labels
y_proba : np.ndarray, optional
Prediction probabilities (required for AUC metrics)
labels : List[str], optional
Class labels for display
fold : int, optional
Fold index for cross-validation
task : str, optional
Task identifier. If None and no tasks exist, creates "default" task.
verbose : bool, default True
Whether to print progress
model : object, optional
Trained model for automatic feature importance extraction
feature_names : List[str], optional
Feature names for feature importance (required if model is provided)
Returns
-------
Dict[str, Any]
Dictionary of calculated metrics
"""
# Single-task mode (tasks=None)
if not self.tasks and self._single_reporter:
if task is not None:
# Convert to multi-task mode on the fly
self.tasks = [task]
self._single_reporter = None
self._create_single_reporter(task)
return self.reporters[task].calculate_metrics(
y_true=y_true,
y_pred=y_pred,
y_proba=y_proba,
labels=labels,
fold=fold,
verbose=verbose,
model=model,
feature_names=feature_names,
)
else:
# Stay in single-task mode
return self._single_reporter.calculate_metrics(
y_true=y_true,
y_pred=y_pred,
y_proba=y_proba,
labels=labels,
fold=fold,
verbose=verbose,
model=model,
feature_names=feature_names,
)
# Multi-task mode
if task is None:
# Use first available task
task = self.tasks[0]
else:
# Task explicitly specified - create if doesn't exist
if task not in self.reporters:
if task not in self.tasks:
self.tasks.append(task)
self._create_single_reporter(task)
# Delegate to task-specific reporter
return self.reporters[task].calculate_metrics(
y_true=y_true,
y_pred=y_pred,
y_proba=y_proba,
labels=labels,
fold=fold,
verbose=verbose,
model=model,
feature_names=feature_names,
)
[docs]
def save(
self,
data: Any,
relative_path: Union[str, Path],
task: Optional[str] = None,
fold: Optional[int] = None,
) -> Path:
"""
Save custom data with automatic task/fold organization.
Parameters
----------
data : Any
Data to save (any format supported by scitex_io.save)
relative_path : Union[str, Path]
Relative path from output directory
task : Optional[str], default None
Task name. If provided, saves to task-specific directory
fold : Optional[int], default None
If provided, automatically prepends "fold_{fold:02d}/" to path
Returns
-------
Path
Absolute path to the saved file
Examples
--------
>>> # Single task mode (no task specified)
>>> reporter.save({"accuracy": 0.95}, "metrics.json")
>>> # Multi-task mode
>>> reporter.save(results, "results.csv", task="binary", fold=0)
"""
# Single-task mode
if not self.tasks and self._single_reporter:
if task is not None:
# Convert to multi-task mode
self.tasks = [task]
self._single_reporter = None
self._create_single_reporter(task)
return self.reporters[task].save(data, relative_path, fold=fold)
else:
# Use single reporter's save
return self._single_reporter.save(data, relative_path, fold=fold)
# Multi-task mode
if task is not None:
# Delegate to task-specific reporter
if task not in self.reporters:
# Create task if it doesn't exist
if task not in self.tasks:
self.tasks.append(task)
self._create_single_reporter(task)
return self.reporters[task].save(data, relative_path, fold=fold)
else:
# Save to base output directory
if fold is not None:
relative_path = f"fold_{fold:02d}/{relative_path}"
return self.storage.save(data, relative_path)
[docs]
def get_summary(self) -> Dict[str, Any]:
"""
Get summary of all calculated metrics.
Returns
-------
Dict[str, Any]
Summary of metrics across all tasks and folds
"""
# Single-task mode
if not self.tasks and self._single_reporter:
return self._single_reporter.get_summary()
# Multi-task mode
if not self.reporters:
return {"message": "No metrics calculated yet"}
if len(self.reporters) == 1:
# Only one task but in multi-task mode
task_name = list(self.reporters.keys())[0]
return self.reporters[task_name].get_summary()
else:
# Multiple tasks - aggregate summaries
summary = {"n_tasks": len(self.reporters), "tasks": {}}
for task_name, reporter in self.reporters.items():
summary["tasks"][task_name] = reporter.get_summary()
return summary
[docs]
def save_summary(
self, filename: str = "summary.json", verbose: bool = True
) -> Path:
"""
Save summary to file.
Parameters
----------
filename : str
Filename for summary
verbose : bool
Whether to print summary
Returns
-------
Path
Path to saved summary file
"""
# Single-task mode - delegate to single reporter
if not self.tasks and self._single_reporter:
return self._single_reporter.save_summary(filename, verbose=verbose)
# Multi-task mode
summary = self.get_summary()
if len(self.reporters) == 1:
# Only one task but in multi-task mode
task_name = list(self.reporters.keys())[0]
return self.reporters[task_name].save_summary(filename, verbose=verbose)
else:
# Multiple tasks - save in root directory
return self.storage.save(summary, filename)
def _save_config(self) -> None:
"""Save configuration to file."""
config_data = {
"output_dir": str(self.output_dir),
"tasks": self.tasks,
"precision": self.precision,
"required_metrics": self.required_metrics,
}
self.storage.save(config_data, "config.json")
[docs]
def save_feature_importance(
self,
model,
feature_names: List[str],
fold: Optional[int] = None,
task: Optional[str] = None,
) -> Dict[str, float]:
"""
Calculate and save feature importance for tree-based models.
Parameters
----------
model : object
Fitted classifier (must have feature_importances_)
feature_names : List[str]
Names of features
fold : int, optional
Fold number for tracking
task : str, optional
Task name for multi-task mode
Returns
-------
Dict[str, float]
Dictionary of feature importances {feature_name: importance}
"""
# Single-task mode
if not self.tasks and self._single_reporter:
return self._single_reporter.save_feature_importance(
model, feature_names, fold
)
# Multi-task mode
if task is not None and task in self.reporters:
return self.reporters[task].save_feature_importance(
model, feature_names, fold
)
return {}
[docs]
def save_feature_importance_summary(
self,
all_importances: List[Dict[str, float]],
task: Optional[str] = None,
) -> None:
"""
Create summary visualization of feature importances across all folds.
Parameters
----------
all_importances : List[Dict[str, float]]
List of feature importance dicts from each fold
task : str, optional
Task name for multi-task mode
"""
# Single-task mode
if not self.tasks and self._single_reporter:
return self._single_reporter.save_feature_importance_summary(
all_importances
)
# Multi-task mode
if task is not None and task in self.reporters:
return self.reporters[task].save_feature_importance_summary(all_importances)
def __repr__(self) -> str:
if not self.tasks:
return f"ClassificationReporter(output_dir='{self.output_dir}', tasks=None)"
elif len(self.tasks) == 1:
return f"ClassificationReporter(output_dir='{self.output_dir}', task='{self.tasks[0]}')"
else:
return f"ClassificationReporter(output_dir='{self.output_dir}', tasks={len(self.tasks)})"
# Convenience function for backwards compatibility
def create_classification_reporter(
output_dir: Union[str, Path], tasks: Optional[List[str]] = None, **kwargs
) -> ClassificationReporter:
"""
Create a unified classification reporter.
Parameters
----------
output_dir : Union[str, Path]
Output directory for results
tasks : List[str], optional
List of task names (for multi-task)
**kwargs
Additional configuration options
Returns
-------
ClassificationReporter
Configured reporter instance
"""
return ClassificationReporter(output_dir, tasks=tasks, **kwargs)
def parse_args():
"""Parse command line arguments."""
import argparse
parser = argparse.ArgumentParser(
description="Test ClassificationReporter with sample data"
)
parser.add_argument(
"--output-dir",
type=str,
default="./.dev/classification_reporter_test_out",
help="Output directory for test results (default: %(default)s)",
)
parser.add_argument(
"--n-samples",
type=int,
default=100,
help="Number of samples to generate (default: %(default)s)",
)
parser.add_argument(
"--n-folds",
type=int,
default=3,
help="Number of CV folds (default: %(default)s)",
)
parser.add_argument(
"--task-type",
type=str,
choices=["binary", "multiclass", "multitask"],
default="binary",
help="Type of classification task (default: %(default)s)",
)
return parser.parse_args()
def main(args):
"""Test ClassificationReporter functionality."""
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
# Create output directory
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print("=" * 60)
print("ClassificationReporter Test")
print("=" * 60)
print(f"Task type: {args.task_type}")
print(f"Output dir: {output_dir}")
print(f"Samples: {args.n_samples}, Folds: {args.n_folds}")
print()
if args.task_type == "binary":
# Binary classification
print("Testing Binary Classification...")
X, y = make_classification(
n_samples=args.n_samples,
n_features=20,
n_classes=2,
n_informative=15,
n_redundant=5,
random_state=42,
)
labels = ["Negative", "Positive"]
reporter = ClassificationReporter(output_dir / "binary", track=True)
# Cross-validation
cv = StratifiedKFold(n_splits=args.n_folds, shuffle=True, random_state=42)
model = LogisticRegression(random_state=42, max_iter=1000)
for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)):
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)
reporter.calculate_metrics(
y_true=y_test, y_pred=y_pred, y_proba=y_proba, labels=labels, fold=fold
)
# Generate reports
reporter.save_summary()
print(f"✓ Binary classification results saved to: {output_dir / 'binary'}")
elif args.task_type == "multiclass":
# Multiclass classification
print("Testing Multiclass Classification...")
X, y = make_classification(
n_samples=args.n_samples,
n_features=20,
n_classes=4,
n_informative=15,
n_redundant=5,
n_clusters_per_class=1,
random_state=42,
)
labels = ["Class_A", "Class_B", "Class_C", "Class_D"]
reporter = ClassificationReporter(output_dir / "multiclass", track=True)
cv = StratifiedKFold(n_splits=args.n_folds, shuffle=True, random_state=42)
model = RandomForestClassifier(n_estimators=50, random_state=42)
for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)):
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)
reporter.calculate_metrics(
y_true=y_test, y_pred=y_pred, y_proba=y_proba, labels=labels, fold=fold
)
reporter.save_summary()
print(
f"✓ Multiclass classification results saved to: {output_dir / 'multiclass'}"
)
elif args.task_type == "multitask":
# Multi-task classification
print("Testing Multi-task Classification...")
# Task 1: Binary
X1, y1 = make_classification(
n_samples=args.n_samples, n_features=20, n_classes=2, random_state=42
)
# Task 2: Multiclass
X2, y2 = make_classification(
n_samples=args.n_samples, n_features=20, n_classes=3, random_state=43
)
reporter = ClassificationReporter(
output_dir / "multitask",
tasks=["binary_task", "multiclass_task"],
track=True,
)
cv = StratifiedKFold(n_splits=args.n_folds, shuffle=True, random_state=42)
# Task 1
model1 = LogisticRegression(random_state=42, max_iter=1000)
for fold, (train_idx, test_idx) in enumerate(cv.split(X1, y1)):
X_train, X_test = X1[train_idx], X1[test_idx]
y_train, y_test = y1[train_idx], y1[test_idx]
model1.fit(X_train, y_train)
y_pred = model1.predict(X_test)
y_proba = model1.predict_proba(X_test)
reporter.calculate_metrics(
y_true=y_test,
y_pred=y_pred,
y_proba=y_proba,
labels=["Neg", "Pos"],
fold=fold,
task="binary_task",
)
# Task 2
model2 = RandomForestClassifier(n_estimators=50, random_state=42)
for fold, (train_idx, test_idx) in enumerate(cv.split(X2, y2)):
X_train, X_test = X2[train_idx], X2[test_idx]
y_train, y_test = y2[train_idx], y2[test_idx]
model2.fit(X_train, y_train)
y_pred = model2.predict(X_test)
y_proba = model2.predict_proba(X_test)
reporter.calculate_metrics(
y_true=y_test,
y_pred=y_pred,
y_proba=y_proba,
labels=["A", "B", "C"],
fold=fold,
task="multiclass_task",
)
reporter.save_summary()
print(
f"✓ Multi-task classification results saved to: {output_dir / 'multitask'}"
)
print()
print("=" * 60)
print("Test Complete!")
print("=" * 60)
print(f"\nCreated files in: {output_dir}")
# List all created files
import subprocess
result = subprocess.run(
["find", str(output_dir), "-type", "f"], capture_output=True, text=True
)
if result.stdout:
files = sorted(result.stdout.strip().split("\n"))
print(f"\nTotal files created: {len(files)}")
print("\nFile tree:")
subprocess.run(["tree", str(output_dir)])
return 0
def run_main():
"""Initialize scitex framework, run main function, and cleanup."""
global CONFIG, CC, sys, plt, rng
import sys
import matplotlib.pyplot as plt
import scitex as stx
args = parse_args()
CONFIG, sys.stdout, sys.stderr, plt, CC, rng = stx.session.start(
sys,
plt,
args=args,
file=__FILE__,
sdir_suffix=None,
verbose=False,
agg=True,
)
exit_status = main(args)
stx.session.close(
CONFIG,
verbose=False,
notify=False,
message="",
exit_status=exit_status,
)
if __name__ == "__main__":
run_main()
# EOF