Coverage for src/instawell/processing/step_05_minmax_scale.py: 84%
31 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
1import logging
3import pandas as pd
5from instawell.core.exp_context import ExperimentContext
6from instawell.core.steps import StepFiles
7from instawell.utils.logging_util import setup_experiment_logging
9# set logging level to INFO
10logger = logging.getLogger(__name__)
13def min_max_scale(ctx: ExperimentContext) -> None:
14 """Min-max scales the background subtracted data."""
15 if ctx.log_to_file:
16 setup_experiment_logging(
17 experiment_dir=ctx.experiment_dir, filename="experiment.log", level=ctx.log_level
18 )
19 # Build the path to the background subtracted data
20 bg_data_path = ctx.experiment_dir / StepFiles.BG_SUB_DATA
21 if not bg_data_path.exists():
22 raise FileNotFoundError(f"BG subtracted data file not found: {bg_data_path}")
23 # Load the background subtracted data
24 data = pd.read_csv(bg_data_path)
25 # ensure the first column is 'Temperature', we know it should be b/c we construct it that way
26 if data.columns[0] != "Temperature":
27 raise ValueError("The first column must be 'Temperature'.")
28 for col in data.columns:
29 if col.startswith("Temperature"):
30 continue
31 if data[col].max() - data[col].min() == 0:
32 continue # Avoid division by zero
33 data[col] = (data[col] - data[col].min()) / (data[col].max() - data[col].min())
34 # ensure columns except Temperature are scaled between 0 and 1
35 for col in data.columns:
36 if col.startswith("Temperature"):
37 continue
38 if data[col].min() < 0 or data[col].max() > 1:
39 raise ValueError(f"Column {col} not scaled between 0 and 1.")
40 # ensure Temperature column is unchanged
41 if not data["Temperature"].equals(pd.read_csv(bg_data_path)["Temperature"]):
42 raise ValueError("Temperature column has been altered during scaling.")
43 # Save the min-max scaled data
44 scaled_data_path = ctx.experiment_dir / StepFiles.MIN_MAX_SCALED_DATA
45 data.to_csv(scaled_data_path, index=False)
46 logging.info(f"Min-max scaled data saved to {scaled_data_path}")