Coverage for src/instawell/processing/step_00_setup_experiment.py: 97%

37 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-07 15:47 -0600

1from __future__ import annotations 

2 

3import json 

4import logging 

5from pathlib import Path 

6 

7import pandas as pd 

8 

9from instawell.core.exp_context import ExperimentContext 

10from instawell.utils.logging_util import ensure_experiment_context, setup_experiment_logging 

11 

12logger = logging.getLogger(__name__) 

13 

14 

15def setup_experiment( 

16 experiment_name: str, 

17 raw_data_path: str, 

18 layout_data_path: str, 

19 *, 

20 fields: tuple[str, ...] = ("concentration", "ligand", "protein", "buffer"), 

21 well_col_identifier: str = "Well", 

22 condition_separator: str = "_", 

23 empty_condition_placeholder: str = "0", 

24 non_protein_control_marker: str = "NPC", 

25 experiments_root: str | Path = "experiments", 

26 temperature_column: str = "Temperature", 

27 log_to_file: bool = True, 

28 log_level: int = logging.INFO, 

29) -> ExperimentContext: 

30 """ 

31 Step 00: Set up the experiment directory, logging, and copy input files. 

32 

33 Can be called either with an ExperimentContext: 

34 setup_experiment(ctx=my_ctx) 

35 

36 or with individual arguments: 

37 setup_experiment( 

38 raw_data_path="raw.csv", 

39 layout_data_path="layout.csv", 

40 experiment_name="exp_001", 

41 ) 

42 """ 

43 

44 tmp_ctx = ExperimentContext( 

45 experiment_name=experiment_name, 

46 raw_data_path=Path(raw_data_path), 

47 layout_data_path=Path(layout_data_path), 

48 experiments_root=Path(experiments_root), 

49 log_to_file=log_to_file, 

50 log_level=log_level, 

51 fields=fields, 

52 well_col_identifier=well_col_identifier, 

53 empty_condition_placeholder=empty_condition_placeholder, 

54 condition_separator=condition_separator, 

55 temperature_column=temperature_column, 

56 non_protein_control_marker=non_protein_control_marker, 

57 ) 

58 experiment_dir = ensure_experiment_context( 

59 experiment_name=tmp_ctx.experiment_name, 

60 experiments_root=tmp_ctx.experiments_root, 

61 log_to_file=tmp_ctx.log_to_file, 

62 log_level=tmp_ctx.log_level, 

63 ) 

64 

65 # Ensure experiment directory + logging (this handles ./experiments/<name>) 

66 

67 logger.info("Created new experiment directory at %s", experiment_dir) 

68 

69 # Copy source files into experiment dir 

70 raw_df = pd.read_csv(tmp_ctx.raw_data_path) 

71 layout_df = pd.read_csv(tmp_ctx.layout_data_path) 

72 

73 raw_copy = experiment_dir / tmp_ctx.raw_data_path.name 

74 layout_copy = experiment_dir / tmp_ctx.layout_data_path.name 

75 

76 raw_df.to_csv(raw_copy, index=False) 

77 layout_df.to_csv(layout_copy, index=False) 

78 

79 logger.info("Raw data copied to %s", raw_copy) 

80 logger.info("Layout data copied to %s", layout_copy) 

81 

82 ctx = ExperimentContext( 

83 experiment_name=experiment_name, 

84 experiments_root=tmp_ctx.experiments_root, 

85 raw_data_path=raw_copy, 

86 layout_data_path=layout_copy, 

87 raw_data_source=tmp_ctx.raw_data_source, 

88 layout_data_source=tmp_ctx.layout_data_source, 

89 log_to_file=log_to_file, 

90 log_level=log_level, 

91 temperature_column=temperature_column, 

92 fields=fields, 

93 well_col_identifier=well_col_identifier, 

94 empty_condition_placeholder=empty_condition_placeholder, 

95 condition_separator=condition_separator, 

96 non_protein_control_marker=non_protein_control_marker, 

97 ) 

98 if ctx.log_to_file: 

99 setup_experiment_logging( 

100 experiment_dir=ctx.experiment_dir, filename="experiment.log", level=ctx.log_level 

101 ) 

102 

103 logger.info( 

104 "Experiment '%s' setup completed in %s", 

105 ctx.experiment_name, 

106 experiment_dir, 

107 ) 

108 ctx.metadata_path.write_text(ctx.model_dump_json(indent=2), encoding="utf-8") 

109 

110 return ctx 

111 

112 

113def load_experiment_context( 

114 experiment_name: str, 

115 experiments_root: str | Path = "experiments", 

116) -> ExperimentContext: 

117 """ 

118 Reload an ExperimentContext for an existing experiment. 

119 

120 Looks for ./experiments/<experiment_name>/experiment.json by default. 

121 """ 

122 experiments_root = Path(experiments_root) 

123 experiment_dir = ( 

124 experiments_root if experiments_root.is_absolute() else Path.cwd() / experiments_root 

125 ) / experiment_name 

126 

127 metadata_path = experiment_dir / "experiment.json" 

128 

129 if not metadata_path.exists(): 

130 raise FileNotFoundError( 

131 f"Could not find experiment metadata at: {metadata_path}\n" 

132 "This usually means step 00 (setup_experiment) has not been run " 

133 "or the experiment directory is incomplete." 

134 ) 

135 

136 data = json.loads(metadata_path.read_text(encoding="utf-8")) 

137 

138 # Normalize paths back to Path objects relative to experiment_dir if needed 

139 # (they were saved as absolute or already correct by Pydantic) 

140 ctx = ExperimentContext.model_validate(data) 

141 

142 # Safety: if experiments_root wasn't stored (older runs), fall back 

143 if not ctx.experiments_root.is_absolute() and not (ctx.experiments_root.exists()): 

144 ctx.experiments_root = experiments_root 

145 

146 return ctx