Coverage for src/instawell/utils/utils.py: 68%

28 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-07 15:47 -0600

1import logging 

2 

3import pandas as pd 

4 

5from instawell.core.parser import condition_from_string 

6 

7# set logging level to INFO 

8logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") 

9 

10 

11def convert_concentration_to_float(concentration: str) -> float: 

12 if "uM" in concentration: 

13 return float(concentration.replace("uM", "").strip()) 

14 elif "mM" in concentration: 

15 return float(concentration.replace("mM", "").strip()) * 1000 # Convert mM to uM 

16 elif "nM" in concentration: 

17 # check if it is zero 

18 c = concentration.replace("nM", "").strip() 

19 if c == "0": 

20 return float(c) 

21 return float(c) / 1000 

22 else: 

23 return float(concentration.strip()) 

24 

25 

26def split_unqcon_column( 

27 data: pd.DataFrame, 

28 fields: tuple[str, ...] = ("concentration", "ligand", "protein", "buffer"), 

29 delimiter: str = "_", 

30) -> pd.DataFrame: 

31 """ 

32 Split the 'unqcond' column into its component parts using the robust parser. 

33 

34 This version handles underscores in component names correctly. 

35 

36 Args: 

37 data: DataFrame with an 'unqcond' column containing condition strings 

38 fields: Ordered tuple of field names to parse from condition strings. 

39 Default: ("concentration", "ligand", "protein", "buffer") 

40 The last len(fields) components of each condition string will be parsed in this order. 

41 

42 Returns: 

43 DataFrame with added columns corresponding to the field names 

44 

45 Examples: 

46 >>> df = pd.DataFrame({"unqcond": ["500uM_ATP_Fic_buffer1"]}) 

47 >>> # Default parsing 

48 >>> df = split_unqcon_column(df) 

49 >>> print(df[["concentration", "ligand", "protein", "buffer"]]) 

50 

51 >>> # Custom field order 

52 >>> df = split_unqcon_column(df, fields=("ligand", "protein", "concentration", "buffer")) 

53 """ 

54 # Parse each condition string 

55 parsed_conditions = [] 

56 for condition_str in data["unqcond"]: 

57 try: 

58 condition_obj = condition_from_string( 

59 condition_str, delimiter=delimiter, fields=fields, include_replicates=False 

60 ) 

61 # Map the parsed fields to their values 

62 parsed_conditions.append( 

63 { 

64 "concentration": condition_obj.concentration, 

65 "ligand": condition_obj.ligand_name, 

66 "protein": condition_obj.protein_name, 

67 "buffer": condition_obj.buffer_condition, 

68 } 

69 ) 

70 except ValueError as e: 

71 # If parsing fails, use empty strings for all fields 

72 logging.warning(f"Failed to parse condition '{condition_str}': {e}") 

73 parsed_conditions.append( 

74 { 

75 "concentration": "", 

76 "ligand": "", 

77 "protein": "", 

78 "buffer": "", 

79 } 

80 ) 

81 

82 # Add the parsed columns to the dataframe 

83 parsed_df = pd.DataFrame(parsed_conditions) 

84 for field in ["concentration", "ligand", "protein", "buffer"]: 

85 data[field] = parsed_df[field] 

86 

87 return data