Coverage for src/instawell/processing/step_05_minmax_scale.py: 84%

31 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-07 15:47 -0600

1import logging 

2 

3import pandas as pd 

4 

5from instawell.core.exp_context import ExperimentContext 

6from instawell.core.steps import StepFiles 

7from instawell.utils.logging_util import setup_experiment_logging 

8 

9# set logging level to INFO 

10logger = logging.getLogger(__name__) 

11 

12 

13def min_max_scale(ctx: ExperimentContext) -> None: 

14 """Min-max scales the background subtracted data.""" 

15 if ctx.log_to_file: 

16 setup_experiment_logging( 

17 experiment_dir=ctx.experiment_dir, filename="experiment.log", level=ctx.log_level 

18 ) 

19 # Build the path to the background subtracted data 

20 bg_data_path = ctx.experiment_dir / StepFiles.BG_SUB_DATA 

21 if not bg_data_path.exists(): 

22 raise FileNotFoundError(f"BG subtracted data file not found: {bg_data_path}") 

23 # Load the background subtracted data 

24 data = pd.read_csv(bg_data_path) 

25 # ensure the first column is 'Temperature', we know it should be b/c we construct it that way 

26 if data.columns[0] != "Temperature": 

27 raise ValueError("The first column must be 'Temperature'.") 

28 for col in data.columns: 

29 if col.startswith("Temperature"): 

30 continue 

31 if data[col].max() - data[col].min() == 0: 

32 continue # Avoid division by zero 

33 data[col] = (data[col] - data[col].min()) / (data[col].max() - data[col].min()) 

34 # ensure columns except Temperature are scaled between 0 and 1 

35 for col in data.columns: 

36 if col.startswith("Temperature"): 

37 continue 

38 if data[col].min() < 0 or data[col].max() > 1: 

39 raise ValueError(f"Column {col} not scaled between 0 and 1.") 

40 # ensure Temperature column is unchanged 

41 if not data["Temperature"].equals(pd.read_csv(bg_data_path)["Temperature"]): 

42 raise ValueError("Temperature column has been altered during scaling.") 

43 # Save the min-max scaled data 

44 scaled_data_path = ctx.experiment_dir / StepFiles.MIN_MAX_SCALED_DATA 

45 data.to_csv(scaled_data_path, index=False) 

46 logging.info(f"Min-max scaled data saved to {scaled_data_path}")