Coverage for src/instawell/processing/step_00_setup_experiment.py: 97%
37 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
1from __future__ import annotations
3import json
4import logging
5from pathlib import Path
7import pandas as pd
9from instawell.core.exp_context import ExperimentContext
10from instawell.utils.logging_util import ensure_experiment_context, setup_experiment_logging
12logger = logging.getLogger(__name__)
15def setup_experiment(
16 experiment_name: str,
17 raw_data_path: str,
18 layout_data_path: str,
19 *,
20 fields: tuple[str, ...] = ("concentration", "ligand", "protein", "buffer"),
21 well_col_identifier: str = "Well",
22 condition_separator: str = "_",
23 empty_condition_placeholder: str = "0",
24 non_protein_control_marker: str = "NPC",
25 experiments_root: str | Path = "experiments",
26 temperature_column: str = "Temperature",
27 log_to_file: bool = True,
28 log_level: int = logging.INFO,
29) -> ExperimentContext:
30 """
31 Step 00: Set up the experiment directory, logging, and copy input files.
33 Can be called either with an ExperimentContext:
34 setup_experiment(ctx=my_ctx)
36 or with individual arguments:
37 setup_experiment(
38 raw_data_path="raw.csv",
39 layout_data_path="layout.csv",
40 experiment_name="exp_001",
41 )
42 """
44 tmp_ctx = ExperimentContext(
45 experiment_name=experiment_name,
46 raw_data_path=Path(raw_data_path),
47 layout_data_path=Path(layout_data_path),
48 experiments_root=Path(experiments_root),
49 log_to_file=log_to_file,
50 log_level=log_level,
51 fields=fields,
52 well_col_identifier=well_col_identifier,
53 empty_condition_placeholder=empty_condition_placeholder,
54 condition_separator=condition_separator,
55 temperature_column=temperature_column,
56 non_protein_control_marker=non_protein_control_marker,
57 )
58 experiment_dir = ensure_experiment_context(
59 experiment_name=tmp_ctx.experiment_name,
60 experiments_root=tmp_ctx.experiments_root,
61 log_to_file=tmp_ctx.log_to_file,
62 log_level=tmp_ctx.log_level,
63 )
65 # Ensure experiment directory + logging (this handles ./experiments/<name>)
67 logger.info("Created new experiment directory at %s", experiment_dir)
69 # Copy source files into experiment dir
70 raw_df = pd.read_csv(tmp_ctx.raw_data_path)
71 layout_df = pd.read_csv(tmp_ctx.layout_data_path)
73 raw_copy = experiment_dir / tmp_ctx.raw_data_path.name
74 layout_copy = experiment_dir / tmp_ctx.layout_data_path.name
76 raw_df.to_csv(raw_copy, index=False)
77 layout_df.to_csv(layout_copy, index=False)
79 logger.info("Raw data copied to %s", raw_copy)
80 logger.info("Layout data copied to %s", layout_copy)
82 ctx = ExperimentContext(
83 experiment_name=experiment_name,
84 experiments_root=tmp_ctx.experiments_root,
85 raw_data_path=raw_copy,
86 layout_data_path=layout_copy,
87 raw_data_source=tmp_ctx.raw_data_source,
88 layout_data_source=tmp_ctx.layout_data_source,
89 log_to_file=log_to_file,
90 log_level=log_level,
91 temperature_column=temperature_column,
92 fields=fields,
93 well_col_identifier=well_col_identifier,
94 empty_condition_placeholder=empty_condition_placeholder,
95 condition_separator=condition_separator,
96 non_protein_control_marker=non_protein_control_marker,
97 )
98 if ctx.log_to_file:
99 setup_experiment_logging(
100 experiment_dir=ctx.experiment_dir, filename="experiment.log", level=ctx.log_level
101 )
103 logger.info(
104 "Experiment '%s' setup completed in %s",
105 ctx.experiment_name,
106 experiment_dir,
107 )
108 ctx.metadata_path.write_text(ctx.model_dump_json(indent=2), encoding="utf-8")
110 return ctx
113def load_experiment_context(
114 experiment_name: str,
115 experiments_root: str | Path = "experiments",
116) -> ExperimentContext:
117 """
118 Reload an ExperimentContext for an existing experiment.
120 Looks for ./experiments/<experiment_name>/experiment.json by default.
121 """
122 experiments_root = Path(experiments_root)
123 experiment_dir = (
124 experiments_root if experiments_root.is_absolute() else Path.cwd() / experiments_root
125 ) / experiment_name
127 metadata_path = experiment_dir / "experiment.json"
129 if not metadata_path.exists():
130 raise FileNotFoundError(
131 f"Could not find experiment metadata at: {metadata_path}\n"
132 "This usually means step 00 (setup_experiment) has not been run "
133 "or the experiment directory is incomplete."
134 )
136 data = json.loads(metadata_path.read_text(encoding="utf-8"))
138 # Normalize paths back to Path objects relative to experiment_dir if needed
139 # (they were saved as absolute or already correct by Pydantic)
140 ctx = ExperimentContext.model_validate(data)
142 # Safety: if experiments_root wasn't stored (older runs), fall back
143 if not ctx.experiments_root.is_absolute() and not (ctx.experiments_root.exists()):
144 ctx.experiments_root = experiments_root
146 return ctx