Coverage for src/instawell/utils/utils.py: 68%
28 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 15:47 -0600
1import logging
3import pandas as pd
5from instawell.core.parser import condition_from_string
7# set logging level to INFO
8logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
11def convert_concentration_to_float(concentration: str) -> float:
12 if "uM" in concentration:
13 return float(concentration.replace("uM", "").strip())
14 elif "mM" in concentration:
15 return float(concentration.replace("mM", "").strip()) * 1000 # Convert mM to uM
16 elif "nM" in concentration:
17 # check if it is zero
18 c = concentration.replace("nM", "").strip()
19 if c == "0":
20 return float(c)
21 return float(c) / 1000
22 else:
23 return float(concentration.strip())
26def split_unqcon_column(
27 data: pd.DataFrame,
28 fields: tuple[str, ...] = ("concentration", "ligand", "protein", "buffer"),
29 delimiter: str = "_",
30) -> pd.DataFrame:
31 """
32 Split the 'unqcond' column into its component parts using the robust parser.
34 This version handles underscores in component names correctly.
36 Args:
37 data: DataFrame with an 'unqcond' column containing condition strings
38 fields: Ordered tuple of field names to parse from condition strings.
39 Default: ("concentration", "ligand", "protein", "buffer")
40 The last len(fields) components of each condition string will be parsed in this order.
42 Returns:
43 DataFrame with added columns corresponding to the field names
45 Examples:
46 >>> df = pd.DataFrame({"unqcond": ["500uM_ATP_Fic_buffer1"]})
47 >>> # Default parsing
48 >>> df = split_unqcon_column(df)
49 >>> print(df[["concentration", "ligand", "protein", "buffer"]])
51 >>> # Custom field order
52 >>> df = split_unqcon_column(df, fields=("ligand", "protein", "concentration", "buffer"))
53 """
54 # Parse each condition string
55 parsed_conditions = []
56 for condition_str in data["unqcond"]:
57 try:
58 condition_obj = condition_from_string(
59 condition_str, delimiter=delimiter, fields=fields, include_replicates=False
60 )
61 # Map the parsed fields to their values
62 parsed_conditions.append(
63 {
64 "concentration": condition_obj.concentration,
65 "ligand": condition_obj.ligand_name,
66 "protein": condition_obj.protein_name,
67 "buffer": condition_obj.buffer_condition,
68 }
69 )
70 except ValueError as e:
71 # If parsing fails, use empty strings for all fields
72 logging.warning(f"Failed to parse condition '{condition_str}': {e}")
73 parsed_conditions.append(
74 {
75 "concentration": "",
76 "ligand": "",
77 "protein": "",
78 "buffer": "",
79 }
80 )
82 # Add the parsed columns to the dataframe
83 parsed_df = pd.DataFrame(parsed_conditions)
84 for field in ["concentration", "ligand", "protein", "buffer"]:
85 data[field] = parsed_df[field]
87 return data