Coverage for src / molecular_simulations / analysis / fingerprinter.py: 34%
141 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 10:07 -0600
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 10:07 -0600
1import openmm
2from openmm.app import AmberPrmtopFile
3import MDAnalysis as mda
4from numba import njit
5import numpy as np
6from pathlib import Path
7from typing import Union
9OptPath = Union[Path, str, None]
10PathLike = Union[Path, str]
12@njit
13def unravel_index(n1: int,
14 n2: int) -> tuple[np.ndarray, np.ndarray]:
15 """
17 """
18 a, b = np.empty((n1, n2), dtype=np.int32), np.empty((n1, n2), dtype=np.int32)
19 for i in range(n1):
20 for j in range(n2):
21 a[i,j], b[i,j] = i, j
22 return a.ravel(),b.ravel()
24@njit
25def _dist_mat(xyz1: np.ndarray,
26 xyz2: np.ndarray) -> np.ndarray:
27 """
29 """
30 n1 = xyz1.shape[0]
31 n2 = xyz2.shape[0]
32 ndim = xyz1.shape[1]
33 dist_mat = np.zeros((n1 * n2))
34 i, j = unravel_index(n1, n2)
35 for k in range(n1 * n2):
36 dr = xyz1[i[k]] - xyz2[j[k]]
37 for ri in range(ndim):
38 dist_mat[k] += np.square(dr[ri])
39 return np.sqrt(dist_mat)
41@njit
42def dist_mat(xyz1: np.ndarray,
43 xyz2: np.ndarray) -> np.ndarray:
44 n1 = xyz1.shape[0]
45 n2 = xyz2.shape[0]
46 return _dist_mat(xyz1, xyz2).reshape(n1, n2)
48@njit
49def electrostatic(distance,
50 charge_i,
51 charge_j):
52 """
53 Calculate electrostatic energy between two particles.
54 Cutoff at 12 Angstrom without switching.
56 Parameters:
57 distance (float): distance between particles i and j (nm)
58 charge_i (float): charge of particle i (e-)
59 charge_j (float): charge of particle j (e-)
61 Returns:
62 energy (float): Electrostatic energy between particles (kJ/mol)
63 """
64 # conversion factors:
65 # Avogadro = 6.022e23 molecules/mol
66 # e- to Coloumb = 1.602e-19 C/e-
67 # nm to m = 1e-9 m/nm
68 # 1/(4\pi\epsilon_0) = 8.988e9 J*m/C^2
70 solvent_dielectric = 78.5
71 # calculate energy
72 if distance > 1.:
73 energy = 0.
74 else:
75 r = distance * 1e-9
76 r_cutoff = 1. * 1e-9
77 k_rf = 1 / (r_cutoff ** 3) * (solvent_dielectric - 1) / (2 * solvent_dielectric + 1)
78 c_rf = 1 / r_cutoff * (3 * solvent_dielectric) / (2 * solvent_dielectric + 1)
80 outer_term = 8.988e9 * (charge_i * 1.602e-19) * (charge_j * 1.602e-19)
81 energy = outer_term * (1 / r + k_rf * r ** 2 - c_rf) * 6.022e23
82 return energy / 1000 # J -> kJ
84@njit
85def electrostatic_sum(distances,
86 charge_is,
87 charge_js):
88 """
89 Calculate sum of all electrostatic interactions between two
90 sets of particles.
92 Parameters:
93 distances (np.ndarray): distances between particles,
94 shape: (len(charge_is),len(charge_js))
95 charge_is (np.ndarray): group i charges
96 charge_js (np.ndarray): group j charges
97 """
98 n = distances.shape[0]
99 m = distances.shape[1]
101 energy = 0.
102 for i in range(n):
103 for j in range(m):
104 energy += electrostatic(distances[i,j],
105 charge_is[i],
106 charge_js[j])
107 return energy
109@njit
110def lennard_jones(distance,
111 sigma_i,
112 sigma_j,
113 epsilon_i,
114 epsilon_j):
115 """
116 Calculate LJ energy between two particles.
117 Cutoff at 12 Angstrom without switching.
119 Parameters
120 ----------
121 distance (float): distance between particles i and j (nm)
122 sigma_i (float): sigma parameter for particle i (nm)
123 sigma_j (float): sigma parameter for particle j (nm)
124 epsilon_i (float): epsilon parameter for particle i (kJ/mol)
125 epsilon_j (float): epsilon parameter for particle j (kJ/mol)
127 Returns: energy (float): LJ interaction energy (kJ/mol)
128 """
129 if distance > 1.2:
130 energy = 0.
131 else:
132 # use combination rules to solve for epsilon and sigma
133 sigma_ij = 0.5 * (sigma_i + sigma_j)
134 epsilon_ij = np.sqrt(epsilon_i * epsilon_j)
136 # calculate energy
137 sigma_r = sigma_ij / distance
138 sigma_r_6 = sigma_r ** 6
139 sigma_r_12 = sigma_r_6 ** 2
140 energy = 4. * epsilon_ij * (sigma_r_12 - sigma_r_6)
141 return energy
143@njit
144def lennard_jones_sum(distances,
145 sigma_is,
146 sigma_js,
147 epsilon_is,
148 epsilon_js):
149 """
150 Calculate sum of all LJ interactions between two sets of
151 particles.
153 Parameters:
154 distances (np.ndarray): distances between particles,
155 shape: (len(sigma_is),len(sigma_js))
156 sigma_is (np.ndarray): group i sigma parameters
157 sigma_js (np.ndarray): group j sigma parameters
158 epsilon_is (np.ndarray): group i epsilon parameters
159 epsilon_js (np.ndarray): group j epsilon parameters
160 """
161 n = distances.shape[0]
162 m = distances.shape[1]
163 energy = 0.
164 for i in range(n):
165 for j in range(m):
166 energy += lennard_jones(distances[i,j],
167 sigma_is[i], sigma_js[j],
168 epsilon_is[i], epsilon_js[j])
169 return energy
171@njit
172def fingerprints(xyzs,
173 charges,
174 sigmas,
175 epsilons,
176 target_resmap,
177 binder_inds):
178 """
179 Calculates electrostatic fingerprint.
180 ES energy between each target residue and all binder residues.
182 Returns:
183 fingerprints: (np.ndarray, np.ndarray),
184 shape=(n_target_residues, n_target_residues)
185 """
186 n_target_residues = len(target_resmap)
187 es_fingerprint = np.zeros((n_target_residues))
188 lj_fingerprint = np.zeros((n_target_residues))
189 for i in range(n_target_residues):
190 dists = dist_mat(xyzs[target_resmap[i]], xyzs[binder_inds])
191 es_fingerprint[i] = electrostatic_sum(dists,
192 charges[target_resmap[i]],
193 charges[binder_inds])
194 lj_fingerprint[i] = lennard_jones_sum(dists,
195 sigmas[target_resmap[i]],
196 sigmas[binder_inds],
197 epsilons[target_resmap[i]],
198 epsilons[binder_inds])
199 return lj_fingerprint, es_fingerprint
201class Fingerprinter:
202 """
203 Calculates interaction energy fingerprint between target and binder chains.
205 Arguments:
206 topology (PathLike): Path to topology file.
207 trajectory (OptPath): Defaults to None. If not None, should be a path to a
208 trajectory file or coordinates file.
209 target_selection (str): Defaults to 'segid A'. Any MDAnalysis selection
210 string that encompasses target.
211 binder_selection (str | None): Defaults to None. If None, binder is defined
212 as anything that is not the target. Otherwise should be an MDAnalysis
213 selection string that encompasses the binder.
214 out_path (OptPath): Defaults to None. If provided will be where outputs are
215 saved to, otherwise the topology parent path is used.
216 out_name (str | None): Defaults to None. If None, output file will be called
217 'fingerprint.npz'.
219 Usage:
220 m = Fingerprinter(*args)
221 m.run()
222 m.save()
223 """
224 def __init__(self,
225 topology: PathLike,
226 trajectory: OptPath=None,
227 target_selection: str = 'segid A',
228 binder_selection: str | None = None,
229 out_path: OptPath = None,
230 out_name: str | None = None):
231 self.topology = Path(topology)
232 self.trajectory = Path(trajectory) if trajectory is not None else trajectory
233 self.target_selection = target_selection
235 if binder_selection is not None:
236 self.binder_selection = binder_selection
237 else:
238 self.binder_selection = f'not {target_selection}'
240 if out_path is None:
241 path = self.topology.parent
242 else:
243 path = Path(out_path)
245 if out_name is None:
246 self.out = path / 'fingerprint.npz'
247 else:
248 self.out = path / out_name
250 def assign_nonbonded_params(self) -> None:
251 # build openmm system
252 system = AmberPrmtopFile(self.topology).createSystem()
254 # extract NB params
255 nonbonded = [f for f in system.getForces() if isinstance(f, openmm.NonbondedForce)][0]
256 self.epsilons = np.zeros((system.getNumParticles()))
257 self.sigmas = np.zeros((system.getNumParticles()))
258 self.charges = np.zeros((system.getNumParticles()))
259 for ind in range(system.getNumParticles()):
260 charge, sigma, epsilon = nonbonded.getParticleParameters(ind)
261 self.charges[ind] = charge / charge.unit # elementary charge
262 self.sigmas[ind] = sigma / sigma.unit # nm
263 self.epsilons[ind] = epsilon / epsilon.unit # kJ/mol
265 def load_pdb(self) -> None:
266 """
267 Loads topology into MDAnalysis universe object. Can take either a PDB
268 or an AMBER prmtop. If trajectory was not specified, will look for either
269 inpcrd or rst7 with the same path and stem as the topology file.
271 Returns:
272 None
273 """
274 if self.topology.suffix == '.pdb':
275 self.u = mda.Universe(self.topology)
276 else:
277 if self.trajectory is not None:
278 coordinates = self.trajectory
279 elif self.topology.with_suffix('.inpcrd').exists():
280 coordinates = self.topology.with_suffix('.inpcrd')
281 else:
282 coordinates = self.topology.with_suffix('.rst7')
284 self.u = mda.Universe(self.topology, coordinates)
286 def assign_residue_mapping(self) -> None:
287 """
288 Map each residue index (1-based) to corresponding atom indices.
290 Returns:
291 None
292 """
293 target = self.u.select_atoms(self.target_selection)
294 self.target_resmap = [residue.atoms.ix for residue in target.residues]
295 self.target_inds = np.concatenate(self.target_resmap)
298 binder = self.u.select_atoms(self.binder_selection)
299 self.binder_resmap = [residue.atoms.ix for residue in binder.residues]
300 self.binder_inds = np.concatenate(self.binder_resmap)
302 def iterate_frames(self) -> None:
303 """
304 Runs calculations over each frame.
306 Returns:
307 None
308 """
309 self.target_fingerprint = np.zeros((
310 len(self.u.trajectory), len(self.target_resmap), 2
311 ))
313 self.binder_fingerprint = np.zeros((
314 len(self.u.trajectory), len(self.binder_resmap), 2
315 ))
317 for i, ts in enumerate(self.u.trajectory):
318 self.calculate_fingerprints(i)
320 def calculate_fingerprints(self,
321 frame_index: int) -> None:
322 """
323 Calculates fingerprints for a given frame of the trajectory.
325 Arguments:
326 frame_index (int): Frame index, since frame number and index might be
327 discontinuous.
328 Returns:
329 None
330 """
331 positions = self.u.atoms.positions * .1 # convert to nm
332 self.target_fingerprint[frame_index] = np.vstack(
333 fingerprints(
334 positions,
335 self.charges,
336 self.sigmas, self.epsilons,
337 self.target_resmap, self.binder_inds
338 )
339 ).T
341 self.binder_fingerprint[frame_index] = np.vstack(
342 fingerprints(
343 positions,
344 self.charges,
345 self.sigmas, self.epsilons,
346 self.binder_resmap, self.target_inds
347 )
348 ).T
350 def run(self) -> None:
351 """
352 Main logic. Obtains parameters, loads PDB and then
353 iterates through trajectory to obtain fingerprints.
355 Returns:
356 None
357 """
358 self.assign_nonbonded_params()
359 self.load_pdb()
360 self.assign_residue_mapping()
361 self.iterate_frames()
363 def save(self) -> None:
364 """
365 Saves data to an npz stack.
367 Returns:
368 None
369 """
370 np.savez(self.out,
371 target=self.target_fingerprint,
372 binder=self.binder_fingerprint)