Coverage for src / molecular_simulations / analysis / fingerprinter.py: 34%

141 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-12 10:07 -0600

1import openmm 

2from openmm.app import AmberPrmtopFile 

3import MDAnalysis as mda 

4from numba import njit 

5import numpy as np 

6from pathlib import Path 

7from typing import Union 

8 

9OptPath = Union[Path, str, None] 

10PathLike = Union[Path, str] 

11 

12@njit 

13def unravel_index(n1: int, 

14 n2: int) -> tuple[np.ndarray, np.ndarray]: 

15 """ 

16  

17 """ 

18 a, b = np.empty((n1, n2), dtype=np.int32), np.empty((n1, n2), dtype=np.int32) 

19 for i in range(n1): 

20 for j in range(n2): 

21 a[i,j], b[i,j] = i, j 

22 return a.ravel(),b.ravel() 

23 

24@njit 

25def _dist_mat(xyz1: np.ndarray, 

26 xyz2: np.ndarray) -> np.ndarray: 

27 """ 

28 

29 """ 

30 n1 = xyz1.shape[0] 

31 n2 = xyz2.shape[0] 

32 ndim = xyz1.shape[1] 

33 dist_mat = np.zeros((n1 * n2)) 

34 i, j = unravel_index(n1, n2) 

35 for k in range(n1 * n2): 

36 dr = xyz1[i[k]] - xyz2[j[k]] 

37 for ri in range(ndim): 

38 dist_mat[k] += np.square(dr[ri]) 

39 return np.sqrt(dist_mat) 

40 

41@njit 

42def dist_mat(xyz1: np.ndarray, 

43 xyz2: np.ndarray) -> np.ndarray: 

44 n1 = xyz1.shape[0] 

45 n2 = xyz2.shape[0] 

46 return _dist_mat(xyz1, xyz2).reshape(n1, n2) 

47 

48@njit 

49def electrostatic(distance, 

50 charge_i, 

51 charge_j): 

52 """ 

53 Calculate electrostatic energy between two particles. 

54 Cutoff at 12 Angstrom without switching. 

55 

56 Parameters: 

57 distance (float): distance between particles i and j (nm) 

58 charge_i (float): charge of particle i (e-) 

59 charge_j (float): charge of particle j (e-) 

60 

61 Returns: 

62 energy (float): Electrostatic energy between particles (kJ/mol) 

63 """ 

64 # conversion factors: 

65 # Avogadro = 6.022e23 molecules/mol 

66 # e- to Coloumb = 1.602e-19 C/e- 

67 # nm to m = 1e-9 m/nm 

68 # 1/(4\pi\epsilon_0) = 8.988e9 J*m/C^2 

69 

70 solvent_dielectric = 78.5 

71 # calculate energy 

72 if distance > 1.: 

73 energy = 0. 

74 else: 

75 r = distance * 1e-9 

76 r_cutoff = 1. * 1e-9 

77 k_rf = 1 / (r_cutoff ** 3) * (solvent_dielectric - 1) / (2 * solvent_dielectric + 1) 

78 c_rf = 1 / r_cutoff * (3 * solvent_dielectric) / (2 * solvent_dielectric + 1) 

79 

80 outer_term = 8.988e9 * (charge_i * 1.602e-19) * (charge_j * 1.602e-19) 

81 energy = outer_term * (1 / r + k_rf * r ** 2 - c_rf) * 6.022e23 

82 return energy / 1000 # J -> kJ 

83 

84@njit 

85def electrostatic_sum(distances, 

86 charge_is, 

87 charge_js): 

88 """ 

89 Calculate sum of all electrostatic interactions between two 

90 sets of particles. 

91 

92 Parameters: 

93 distances (np.ndarray): distances between particles, 

94 shape: (len(charge_is),len(charge_js)) 

95 charge_is (np.ndarray): group i charges 

96 charge_js (np.ndarray): group j charges 

97 """ 

98 n = distances.shape[0] 

99 m = distances.shape[1] 

100 

101 energy = 0. 

102 for i in range(n): 

103 for j in range(m): 

104 energy += electrostatic(distances[i,j], 

105 charge_is[i], 

106 charge_js[j]) 

107 return energy 

108 

109@njit 

110def lennard_jones(distance, 

111 sigma_i, 

112 sigma_j, 

113 epsilon_i, 

114 epsilon_j): 

115 """ 

116 Calculate LJ energy between two particles. 

117 Cutoff at 12 Angstrom without switching. 

118 

119 Parameters 

120 ---------- 

121 distance (float): distance between particles i and j (nm) 

122 sigma_i (float): sigma parameter for particle i (nm) 

123 sigma_j (float): sigma parameter for particle j (nm) 

124 epsilon_i (float): epsilon parameter for particle i (kJ/mol) 

125 epsilon_j (float): epsilon parameter for particle j (kJ/mol) 

126 

127 Returns: energy (float): LJ interaction energy (kJ/mol) 

128 """ 

129 if distance > 1.2: 

130 energy = 0. 

131 else: 

132 # use combination rules to solve for epsilon and sigma 

133 sigma_ij = 0.5 * (sigma_i + sigma_j) 

134 epsilon_ij = np.sqrt(epsilon_i * epsilon_j) 

135 

136 # calculate energy 

137 sigma_r = sigma_ij / distance 

138 sigma_r_6 = sigma_r ** 6 

139 sigma_r_12 = sigma_r_6 ** 2 

140 energy = 4. * epsilon_ij * (sigma_r_12 - sigma_r_6) 

141 return energy 

142 

143@njit 

144def lennard_jones_sum(distances, 

145 sigma_is, 

146 sigma_js, 

147 epsilon_is, 

148 epsilon_js): 

149 """ 

150 Calculate sum of all LJ interactions between two sets of 

151 particles.  

152 

153 Parameters: 

154 distances (np.ndarray): distances between particles,  

155 shape: (len(sigma_is),len(sigma_js)) 

156 sigma_is (np.ndarray): group i sigma parameters 

157 sigma_js (np.ndarray): group j sigma parameters 

158 epsilon_is (np.ndarray): group i epsilon parameters 

159 epsilon_js (np.ndarray): group j epsilon parameters 

160 """ 

161 n = distances.shape[0] 

162 m = distances.shape[1] 

163 energy = 0. 

164 for i in range(n): 

165 for j in range(m): 

166 energy += lennard_jones(distances[i,j], 

167 sigma_is[i], sigma_js[j], 

168 epsilon_is[i], epsilon_js[j]) 

169 return energy 

170 

171@njit 

172def fingerprints(xyzs, 

173 charges, 

174 sigmas, 

175 epsilons, 

176 target_resmap, 

177 binder_inds): 

178 """ 

179 Calculates electrostatic fingerprint. 

180 ES energy between each target residue and all binder residues. 

181 

182 Returns: 

183 fingerprints: (np.ndarray, np.ndarray),  

184 shape=(n_target_residues, n_target_residues) 

185 """ 

186 n_target_residues = len(target_resmap) 

187 es_fingerprint = np.zeros((n_target_residues)) 

188 lj_fingerprint = np.zeros((n_target_residues)) 

189 for i in range(n_target_residues): 

190 dists = dist_mat(xyzs[target_resmap[i]], xyzs[binder_inds]) 

191 es_fingerprint[i] = electrostatic_sum(dists, 

192 charges[target_resmap[i]], 

193 charges[binder_inds]) 

194 lj_fingerprint[i] = lennard_jones_sum(dists, 

195 sigmas[target_resmap[i]], 

196 sigmas[binder_inds], 

197 epsilons[target_resmap[i]], 

198 epsilons[binder_inds]) 

199 return lj_fingerprint, es_fingerprint 

200 

201class Fingerprinter: 

202 """ 

203 Calculates interaction energy fingerprint between target and binder chains.  

204  

205 Arguments: 

206 topology (PathLike): Path to topology file. 

207 trajectory (OptPath): Defaults to None. If not None, should be a path to a 

208 trajectory file or coordinates file. 

209 target_selection (str): Defaults to 'segid A'. Any MDAnalysis selection 

210 string that encompasses target. 

211 binder_selection (str | None): Defaults to None. If None, binder is defined 

212 as anything that is not the target. Otherwise should be an MDAnalysis 

213 selection string that encompasses the binder. 

214 out_path (OptPath): Defaults to None. If provided will be where outputs are 

215 saved to, otherwise the topology parent path is used. 

216 out_name (str | None): Defaults to None. If None, output file will be called 

217 'fingerprint.npz'. 

218  

219 Usage: 

220 m = Fingerprinter(*args) 

221 m.run() 

222 m.save() 

223 """ 

224 def __init__(self, 

225 topology: PathLike, 

226 trajectory: OptPath=None, 

227 target_selection: str = 'segid A', 

228 binder_selection: str | None = None, 

229 out_path: OptPath = None, 

230 out_name: str | None = None): 

231 self.topology = Path(topology) 

232 self.trajectory = Path(trajectory) if trajectory is not None else trajectory 

233 self.target_selection = target_selection 

234 

235 if binder_selection is not None: 

236 self.binder_selection = binder_selection 

237 else: 

238 self.binder_selection = f'not {target_selection}' 

239 

240 if out_path is None: 

241 path = self.topology.parent 

242 else: 

243 path = Path(out_path) 

244 

245 if out_name is None: 

246 self.out = path / 'fingerprint.npz' 

247 else: 

248 self.out = path / out_name 

249 

250 def assign_nonbonded_params(self) -> None: 

251 # build openmm system 

252 system = AmberPrmtopFile(self.topology).createSystem() 

253 

254 # extract NB params 

255 nonbonded = [f for f in system.getForces() if isinstance(f, openmm.NonbondedForce)][0] 

256 self.epsilons = np.zeros((system.getNumParticles())) 

257 self.sigmas = np.zeros((system.getNumParticles())) 

258 self.charges = np.zeros((system.getNumParticles())) 

259 for ind in range(system.getNumParticles()): 

260 charge, sigma, epsilon = nonbonded.getParticleParameters(ind) 

261 self.charges[ind] = charge / charge.unit # elementary charge 

262 self.sigmas[ind] = sigma / sigma.unit # nm 

263 self.epsilons[ind] = epsilon / epsilon.unit # kJ/mol 

264 

265 def load_pdb(self) -> None: 

266 """ 

267 Loads topology into MDAnalysis universe object. Can take either a PDB 

268 or an AMBER prmtop. If trajectory was not specified, will look for either 

269 inpcrd or rst7 with the same path and stem as the topology file. 

270 

271 Returns: 

272 None 

273 """ 

274 if self.topology.suffix == '.pdb': 

275 self.u = mda.Universe(self.topology) 

276 else: 

277 if self.trajectory is not None: 

278 coordinates = self.trajectory 

279 elif self.topology.with_suffix('.inpcrd').exists(): 

280 coordinates = self.topology.with_suffix('.inpcrd') 

281 else: 

282 coordinates = self.topology.with_suffix('.rst7') 

283 

284 self.u = mda.Universe(self.topology, coordinates) 

285 

286 def assign_residue_mapping(self) -> None: 

287 """ 

288 Map each residue index (1-based) to corresponding atom indices. 

289 

290 Returns: 

291 None 

292 """ 

293 target = self.u.select_atoms(self.target_selection) 

294 self.target_resmap = [residue.atoms.ix for residue in target.residues] 

295 self.target_inds = np.concatenate(self.target_resmap) 

296 

297 

298 binder = self.u.select_atoms(self.binder_selection) 

299 self.binder_resmap = [residue.atoms.ix for residue in binder.residues] 

300 self.binder_inds = np.concatenate(self.binder_resmap) 

301 

302 def iterate_frames(self) -> None: 

303 """ 

304 Runs calculations over each frame. 

305 

306 Returns: 

307 None 

308 """ 

309 self.target_fingerprint = np.zeros(( 

310 len(self.u.trajectory), len(self.target_resmap), 2 

311 )) 

312 

313 self.binder_fingerprint = np.zeros(( 

314 len(self.u.trajectory), len(self.binder_resmap), 2 

315 )) 

316 

317 for i, ts in enumerate(self.u.trajectory): 

318 self.calculate_fingerprints(i) 

319 

320 def calculate_fingerprints(self, 

321 frame_index: int) -> None: 

322 """ 

323 Calculates fingerprints for a given frame of the trajectory. 

324 

325 Arguments: 

326 frame_index (int): Frame index, since frame number and index might be 

327 discontinuous. 

328 Returns: 

329 None 

330 """ 

331 positions = self.u.atoms.positions * .1 # convert to nm 

332 self.target_fingerprint[frame_index] = np.vstack( 

333 fingerprints( 

334 positions, 

335 self.charges, 

336 self.sigmas, self.epsilons, 

337 self.target_resmap, self.binder_inds 

338 ) 

339 ).T 

340 

341 self.binder_fingerprint[frame_index] = np.vstack( 

342 fingerprints( 

343 positions, 

344 self.charges, 

345 self.sigmas, self.epsilons, 

346 self.binder_resmap, self.target_inds 

347 ) 

348 ).T 

349 

350 def run(self) -> None: 

351 """ 

352 Main logic. Obtains parameters, loads PDB and then 

353 iterates through trajectory to obtain fingerprints. 

354 

355 Returns: 

356 None 

357 """ 

358 self.assign_nonbonded_params() 

359 self.load_pdb() 

360 self.assign_residue_mapping() 

361 self.iterate_frames() 

362 

363 def save(self) -> None: 

364 """ 

365 Saves data to an npz stack. 

366 

367 Returns: 

368 None 

369 """ 

370 np.savez(self.out, 

371 target=self.target_fingerprint, 

372 binder=self.binder_fingerprint)