Coverage for src / molecular_simulations / analysis / utils.py: 100%
50 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-13 01:26 -0600
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-13 01:26 -0600
1import MDAnalysis as mda
2import numpy as np
3from pathlib import Path
4import shutil
5from typing import Callable, Union
7OptPath = Union[Path, str, None]
9class EmbedData:
10 """
11 Embeds given data into the beta-factor column of PDB. Writes out to same
12 path as input PDB and backs up old PDB file, unless an output path is
13 explicitly provided. Embedding data should be provided as a dictionary where
14 the keys are MDAnalysis selection strings and the values are numpy arrays
15 of shape (n_frames, n_residues, n_datapoints) or (n_residues, n_datapoints).
17 Arguments:
18 pdb (Path): Path to PDB file to load. Also will be the output if one is
19 not provided.
20 embedding_dict (dict[str, np.ndarray]): A dictionary containing MDAnalysis
21 selections as keys and data as the values.
22 out (OptPath): Defaults to None. If not None this will be the path to the
23 output PDB.
24 """
25 def __init__(self,
26 pdb: Path,
27 embedding_dict: dict[str, np.ndarray],
28 out: OptPath=None):
29 self.pdb = pdb if isinstance(pdb, Path) else Path(pdb)
30 self.embeddings = embedding_dict
31 self.out = out if out is not None else self.pdb
33 self.u = mda.Universe(str(self.pdb))
35 def embed(self) -> None:
36 """
37 Unpacks embedding dictionary, embeds data and writes out new PDB.
39 Returns:
40 None
41 """
42 for sel, data in self.embeddings.items():
43 self.embed_selection(sel, data)
45 self.write_new_pdb()
47 def embed_selection(self,
48 selection: str,
49 data: np.ndarray) -> None:
50 """
51 Embeds data into given selection in the beta column for each residue.
53 Arguments:
54 selection (str): MDAnalysis selection string.
55 data (np.ndarray): Array of data to place in beta column. Shape should
56 be (n_residues_in_selection, 1).
58 Returns:
59 None
60 """
61 sel = self.u.select_atoms(selection)
63 for residue, datum in zip(sel.residues, data):
64 residue.atoms.tempfactors = np.full(residue.atoms.tempfactors.shape, datum)
66 def write_new_pdb(self) -> None:
67 """
68 Writes out PDB file. If an output was not designated, backs up original
69 PDB with the extension .orig.pdb. If this backup already exists, do not
70 back up the PDB as that may occur if you run this twice and to do so would
71 mean losing the actual original PDB.
73 Returns:
74 None
75 """
76 if self.out.exists():
77 if not self.pdb.with_suffix('.orig.pdb').exists():
78 shutil.copyfile(str(self.pdb), str(self.pdb.with_suffix('.orig.pdb')))
80 with mda.Writer(str(self.out)) as W:
81 W.write(self.u.atoms)
84class EmbedEnergyData(EmbedData):
85 """
86 Special instance of EmbedData in which the data stored in embedding_dict is
87 non-bonded energy data with both LJ and coulombic terms. In this case we need
88 to obtain the total energy by summing these and rescale it as many softwares
89 do not understand a negative beta factor.
91 Arguments:
92 pdb (Path): Path to PDB file to load. Also will be the output if one is
93 not provided.
94 embedding_dict (dict[str, np.ndarray]): A dictionary containing MDAnalysis
95 selections as keys and data as the values.
96 out (OptPath): Defaults to None. If not None this will be the path to the
97 output PDB.
98 """
99 def __init__(self,
100 pdb: Path,
101 embedding_dict: dict[str, np.ndarray],
102 out: OptPath=None):
103 super().__init__(pdb, embedding_dict, out)
104 self.embeddings = self.preprocess()
106 def preprocess(self) -> dict[str, np.ndarray]:
107 """
108 Processes embeddings data so that it can be fed through parent methods.
109 This requires the embeddings data contain values of one-dimensional arrays,
110 and that the data be rescaled such that there are no negative values while
111 preserving the distance between values.
113 Returns:
114 (dict[str, np.ndarray]): Processed data array.
115 """
116 new_embeddings = dict()
117 all_data = []
118 for sel, data in self.embeddings.items():
119 sanitized = self.sanitize_data(data)
120 all_data.append(sanitized)
122 rescaling_factor = np.min(np.concatenate(all_data))
123 for sel, data in self.embeddings.items():
124 sanitized = self.sanitize_data(data)
125 rescaled = sanitized / rescaling_factor
126 rescaled[np.where(rescaled > 1.)] = 1.
127 new_embeddings[sel] = rescaled
129 return new_embeddings
131 @staticmethod
132 def sanitize_data(data: np.ndarray,) -> np.ndarray:
133 """
134 Takes in data of shape (n_frames, n_residues, n_terms) and
135 returns one-dimensional array of shape (n_residues,) by
136 first averaging in the first dimension and then summing in
137 the new second dimension - originally the third dimension.
139 Arguments:
140 data (np.ndarray): Unprocessed input data.
142 Returns:
143 (np.ndarray): One-dimensional processed data.
144 """
145 if len(data.shape) > 2:
146 data = np.mean(data, axis=0)
148 if data.shape[1] > 1:
149 data = np.sum(data, axis=1)
151 return data