Coverage for src/instawell/processing/step_02_filter_wells.py: 92%
37 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
1import logging
3import pandas as pd
5from instawell.core.exceptions import PrerequisiteStepError
6from instawell.core.exp_context import ExperimentContext
7from instawell.core.steps import StepFiles
8from instawell.utils.logging_util import setup_experiment_logging
10# set logging level to INFO
11logger = logging.getLogger(__name__)
14def filter_wells(
15 ctx: ExperimentContext,
16 *,
17 wells_to_filter: list[str] | None = None,
18) -> None:
19 """
20 Filters the organized data based on the provided parameters. MUST still be run even if no wells are to be filtered, in order to record that no wells were filtered.
21 """
22 if ctx.log_to_file:
23 setup_experiment_logging(
24 experiment_dir=ctx.experiment_dir, filename="experiment.log", level=ctx.log_level
25 )
27 organized_data_path = ctx.experiment_dir / StepFiles.INGESTED_DATA
28 try:
29 organized_data = pd.read_csv(organized_data_path)
30 except FileNotFoundError as exc:
31 raise PrerequisiteStepError(
32 f"Missing prerequisite data file: {organized_data_path}\n"
33 "This usually means step 1 has not been run yet for this experiment.\n"
34 "Please run `step_01_organize_data` (or the equivalent first step) "
35 f"for experiment '{ctx.experiment_name}' and try again."
36 ) from exc
38 filtered_data_path = ctx.experiment_dir / StepFiles.FILTERED_DATA
39 required_columns = [
40 "Temperature",
41 "well",
42 "value",
43 "ligand",
44 "protein",
45 "buffer",
46 "concentration",
47 "well_unqcond",
48 ]
49 for col in required_columns:
50 if col not in organized_data.columns:
51 raise ValueError(f"Missing required column: {col} in the data.")
52 if not wells_to_filter:
53 logger.info("No wells to filter. Returning the original data.")
54 # save the organized data to a csv file in the experiment directory
55 organized_data.to_csv(filtered_data_path, index=False)
56 logger.info(f"Filtered data saved to {filtered_data_path}")
57 # save a .txt file with the filtered wells
58 with open(ctx.experiment_dir / StepFiles.FILTERED_WELLS, "w") as f:
59 f.write("No wells filtered.")
60 return
62 # first check if each well in wells is in the organized_data
63 for well in wells_to_filter:
64 if well not in organized_data["well"].unique():
65 raise ValueError(
66 f"Well {well} not found in organized data. Please check the well names."
67 )
68 # Filter the organized data to remove the specified wells
69 for well in wells_to_filter:
70 organized_data = organized_data[organized_data["well"] != well]
71 logger.info(f"Filtered out well: {well}")
73 # save the filtered data to a csv file in the experiment directory
74 organized_data.to_csv(filtered_data_path, index=False)
75 logger.info(f"Filtered data saved to {filtered_data_path}")
77 # save a .txt file with the filtered wells
78 with open(ctx.experiment_dir / StepFiles.FILTERED_WELLS, "w") as f:
79 f.write("\n".join(wells_to_filter))