#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-10-02 21:15:33 (ywatanabe)"
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/ml/classification/reporters/reporter_utils/storage.py
# ----------------------------------------
from __future__ import annotations
import os
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------
"""
Storage utilities for classification reporters.
Enhanced version of storage utilities with:
- Consistent use of scitex_io.save for all file operations
- Lazy directory creation
- Numerical precision control
- Better error handling
- Optimized file organization
"""
from pathlib import Path
from typing import Any, Dict, Union
import numpy as np
from scitex_io import save as stx_io_save
[docs]
class MetricStorage:
"""
Enhanced storage handler with lazy creation and precision control.
Features:
- Creates directories only when actually needed
- Rounds numerical values to specified precision
- Graceful error handling with informative messages
- Supports all standard data formats
"""
[docs]
def __init__(
self,
base_dir: Union[str, Path],
precision: int = 3,
verbose: bool = True,
):
"""
Initialize storage with base directory and precision.
Parameters
----------
base_dir : Union[str, Path]
Base directory for saving files
precision : int, default 3
Number of decimal places for numerical outputs
"""
self.base_dir = Path(base_dir)
self.precision = precision
self.verbose = verbose
def _round_numeric(self, data: Any) -> Any:
"""Round numeric values to specified precision."""
if isinstance(data, (int, float, np.integer, np.floating)):
return round(float(data), self.precision)
elif isinstance(data, dict):
return {k: self._round_numeric(v) for k, v in data.items()}
elif isinstance(data, (list, tuple)):
return type(data)(self._round_numeric(v) for v in data)
elif isinstance(data, np.ndarray):
if data.dtype.kind in "fc": # float or complex
return np.round(data, self.precision)
return data
else:
return data
[docs]
def save(
self, data: Any, relative_path: Union[str, Path], verbose=True, **kwargs
) -> Path:
"""
Save data with lazy directory creation and precision control.
Uses scitex_io.save for all file operations to ensure consistency.
Parameters
----------
data : Any
Data to save
relative_path : Union[str, Path]
Path relative to base_dir
verbose : bool, optional
Print save confirmation
**kwargs : dict
Additional keyword arguments passed to scitex_io.save (e.g., index=True for CSV)
Returns
-------
Path
Absolute path to saved file
"""
# Round numerical values for precision control
data = self._round_numeric(data)
# Construct full path and resolve to absolute
full_path = (self.base_dir / relative_path).resolve()
# Create directory only when actually needed
full_path.parent.mkdir(parents=True, exist_ok=True)
try:
# Use scitex_io.save for all file types (handles json, csv, figures, text, etc.)
# IMPORTANT: use_caller_path=False to avoid nested directory issues
# IMPORTANT: full_path must be absolute to prevent _out directory creation
stx_io_save(data, str(full_path), use_caller_path=False, **kwargs)
if verbose or self.verbose:
import scitex_logging as logging
logger = logging.getLogger(__name__)
logger.info(f"Saved to: {full_path}")
return full_path.absolute()
except Exception as e:
import scitex_logging as logging
logger = logging.getLogger(__name__)
logger.warning(f"Failed to save {relative_path}: {e}")
return full_path.absolute()
[docs]
def save_metric(
metric_value: Any,
path: Union[str, Path],
fold: int = None,
precision: int = 4,
) -> Path:
"""
Improved function to save individual metrics with precision control.
Parameters
----------
metric_value : Any
Metric value to save
path : Union[str, Path]
Output path
fold : int, optional
Fold index to include in metadata
precision : int, default 4
Number of decimal places
Returns
-------
Path
Path to saved file
"""
# Resolve to absolute path to prevent _out directory creation
path = Path(path).resolve()
path.parent.mkdir(parents=True, exist_ok=True)
# Round numerical values recursively
def round_value(val, prec):
if isinstance(val, (int, float, np.integer, np.floating)):
return round(float(val), prec)
elif isinstance(val, dict):
return {k: round_value(v, prec) for k, v in val.items()}
elif isinstance(val, (list, tuple)):
return type(val)(round_value(v, prec) for v in val)
else:
return val
metric_value = round_value(metric_value, precision)
# Prepare data structure
if isinstance(metric_value, dict):
data = metric_value
else:
metric_name = path.stem # Use filename as metric name
data = {"metric": metric_name, "value": metric_value}
# Add fold information if provided
if fold is not None:
data["fold"] = fold
# IMPORTANT: use_caller_path=False and absolute path to avoid nested directory issues
stx_io_save(data, str(path), use_caller_path=False)
return path
[docs]
def create_directory_structure_lazy(
base_dir: Union[str, Path],
) -> Dict[str, Path]:
"""
Create directory structure mapping without actually creating directories.
This returns paths that can be created later when actually needed.
Parameters
----------
base_dir : Union[str, Path]
Base directory
Returns
-------
Dict[str, Path]
Mapping of directory types to paths
"""
base_path = Path(base_dir)
structure = {
"base": base_path,
"metrics": base_path / "metrics",
"plots": base_path / "plots",
"tables": base_path / "tables",
"reports": base_path / "reports",
"models": base_path / "models",
"paper_export": base_path / "paper_export",
}
return structure
# EOF