Coverage for src / molecular_simulations / build / build_amber.py: 87%

103 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-13 01:26 -0600

1#!/usr/bin/env python 

2import logging 

3from openmm.app import PDBFile 

4import os 

5from pathlib import Path 

6import subprocess 

7import tempfile 

8from typing import Dict, List, Optional, Union 

9 

10PathLike = Union[str, Path] 

11OptPath = Union[str, Path, None] 

12 

13logger = logging.getLogger(__name__) 

14 

15class ImplicitSolvent: 

16 """ 

17 Class for building a system using ambertools. Produces explicit solvent cubic box 

18 with user-specified padding which has been neutralized and ionized with 150mM NaCl. 

19 """ 

20 def __init__(self, 

21 path: OptPath, 

22 pdb: str, 

23 protein: bool=True, 

24 rna: bool=False, 

25 dna: bool=False, 

26 phos_protein: bool=False, 

27 mod_protein: bool=False, 

28 out: OptPath=None, 

29 delete_temp_file: bool=True, 

30 amberhome: Optional[str]=None, 

31 **kwargs): 

32 if path is None: 

33 self.path = Path(pdb).parent 

34 elif isinstance(path, str): 

35 self.path = Path(path) 

36 else: 

37 self.path = path 

38 

39 self.path = self.path.resolve() 

40 self.path.mkdir(exist_ok=True, parents=True) 

41 

42 self.pdb = Path(pdb).resolve() 

43 

44 if out is not None: 

45 self.out = self.path / out 

46 else: 

47 self.out = self.path / 'system.pdb' 

48 

49 self.out = self.out.resolve() 

50 self.delete = delete_temp_file 

51 

52 if amberhome is None: 

53 if 'AMBERHOME' in os.environ: 

54 amberhome = os.environ['AMBERHOME'] 

55 else: 

56 raise ValueError(f'AMBERHOME is not set in env vars!') 

57 

58 self.tleap = str(Path(amberhome) / 'bin' / 'tleap') 

59 self.pdb4amber = str(Path(amberhome) / 'bin' / 'pdb4amber') 

60 

61 switches = [protein, rna, dna, phos_protein, mod_protein] 

62 ffs = [ 

63 'leaprc.protein.ff19SB', 

64 'leaprc.RNA.Shaw', 

65 'leaprc.DNA.OL21', 

66 'leaprc.phosaa19SB', 

67 'leaprc.protein.ff14SB_modAA' 

68 ] 

69 

70 self.ffs = [ 

71 ff for ff, switch in zip(ffs, switches) if switch 

72 ] 

73 

74 for key, val in kwargs.items(): 

75 setattr(self, key, val) 

76 

77 def build(self) -> None: 

78 """ 

79 Orchestrate the various things that need to happen in order 

80 to produce an explicit solvent system. This includes running 

81 `pdb4amber`, computing the periodic box size, number of ions 

82 needed and running `tleap` to make the final system. 

83 

84 Returns: 

85 None 

86 """ 

87 logger.info('Build start: implicit solvent', 

88 extra={'pdb': str(self.pdb), 'out': str(self.out)}) 

89 self.tleap_it() 

90 logger.info('Build finished') 

91 

92 def tleap_it(self) -> None: 

93 """ 

94 While more painful, tleap is more stable for system building. 

95 Runs the input PDB through tleap with the FF19SB protein forcefield 

96 and whichever other forcefields were turned on. 

97 

98 Returns: 

99 None 

100 """ 

101 ffs = '\n'.join([f'source {ff}' for ff in self.ffs]) 

102 tleap_in = f""" 

103 {ffs} 

104 prot = loadpdb {self.pdb} 

105 set default pbradii mbondi3 

106 savepdb prot {self.out} 

107 saveamberparm prot {self.out.with_suffix('.prmtop')} {self.out.with_suffix('.inpcrd')} 

108 quit 

109 """ 

110 

111 self.temp_tleap(tleap_in) 

112 

113 def write_leap(self, 

114 inp: str) -> Path: 

115 """ 

116 Writes out a tleap input file and returns the path 

117 to the file. 

118 

119 Returns: 

120 (Path): Path to tleap input file. 

121 """ 

122 leap_file = f'{self.path}/tleap.in' 

123 with open(leap_file, 'w') as outfile: 

124 outfile.write(inp) 

125 

126 return Path(leap_file) 

127 

128 def temp_tleap(self, 

129 inp: str) -> None: 

130 """ 

131 Writes a temporary file for tleap and then runs tleap. This 

132 makes handling parallel tleap runs much simpler as we are avoiding 

133 the different workers overwriting tleap inputs and getting confused. 

134 

135 Arguments: 

136 inp (str): The tleap input file contents as a single string. 

137 

138 Returns: 

139 None 

140 """ 

141 with tempfile.NamedTemporaryFile(mode='w+', 

142 suffix='.in', 

143 delete=self.delete, 

144 dir=str(self.path)) as temp_file: 

145 temp_file.write(inp) 

146 temp_file.flush() 

147 tleap_command = f'{self.tleap} -f {temp_file.name}' 

148 subprocess.run(tleap_command, shell=True, cwd=str(self.path), check=True, 

149 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 

150 

151class ExplicitSolvent(ImplicitSolvent): 

152 """ 

153 Class for building a system using ambertools. Produces explicit solvent cubic box 

154 with user-specified padding which has been neutralized and ionized with 150mM NaCl. 

155 """ 

156 def __init__(self, 

157 path: PathLike, 

158 pdb: PathLike, 

159 padding: float=10., 

160 protein: bool=True, 

161 rna: bool=False, 

162 dna: bool=False, 

163 phos_protein: bool=False, 

164 mod_protein: bool=False, 

165 polarizable: bool=False, 

166 delete_temp_file: bool=True, 

167 amberhome: Optional[str]=None, 

168 **kwargs): 

169 super().__init__(path=path, 

170 pdb=pdb, 

171 protein=protein, 

172 rna=rna, 

173 dna=dna, 

174 phos_protein=phos_protein, 

175 mod_protein=mod_protein, 

176 out=None, 

177 delete_temp_file=delete_temp_file, 

178 amberhome=amberhome, 

179 **kwargs) 

180 self.pad = padding 

181 self.ffs.extend(['leaprc.water.opc']) 

182 self.water_box = 'OPCBOX' 

183 

184 if polarizable: 

185 self.ffs[0] = 'leaprc.protein.ff15ipq' 

186 self.ffs[-1] = 'leaprc.water.spceb' 

187 self.water_box = 'SPCBOX' 

188 

189 def build(self) -> None: 

190 """ 

191 Orchestrate the various things that need to happen in order 

192 to produce an explicit solvent system. This includes running 

193 `pdb4amber`, computing the periodic box size, number of ions 

194 needed and running `tleap` to make the final system. 

195 

196 Returns: 

197 None 

198 """ 

199 logger.info('Build started: explicit solvent', 

200 extra={'pdb': str(self.pdb), 'out': str(self.out)}) 

201 self.prep_pdb() 

202 dim = self.get_pdb_extent() 

203 num_ions = self.get_ion_numbers(dim**3) 

204 self.assemble_system(dim, num_ions) 

205 self.clean_up_directory() 

206 logger.info('Build finished') 

207 

208 def prep_pdb(self) -> None: 

209 """ 

210 Runs input PDB through `pdb4amber` to ensure it is compliant enough 

211 that tleap won't freak out on us later. Removes any explicit hydrogens 

212 from the input structure to avoid name mismatches. 

213 

214 Returns: 

215 None 

216 """ 

217 cmd = f'{self.pdb4amber} -i {self.pdb} -o {self.path}/protein.pdb -y' 

218 subprocess.run(cmd, shell=True, cwd=str(self.path), check=True, 

219 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 

220 

221 self.pdb = f'{self.path}/protein.pdb' 

222 

223 def assemble_system(self, dim: float, num_ions: int) -> None: 

224 """ 

225 Build system in tleap. 

226 

227 Arguments: 

228 dim (float): Longest dimension in Angstrom. 

229 num_ions (int): Explicit number of ions to achieve 150mM. 

230 

231 Returns: 

232 None 

233 """ 

234 tleap_ffs = '\n'.join([f'source {ff}' for ff in self.ffs]) 

235 out_pdb = self.out 

236 out_top = self.out.with_suffix('.prmtop') 

237 out_coor = self.out.with_suffix('.inpcrd') 

238 

239 tleap_complex = f"""{tleap_ffs} 

240 PROT = loadpdb {self.pdb} 

241  

242 setbox PROT centers 

243 set PROT box {{{dim} {dim} {dim}}} 

244 solvatebox PROT {self.water_box} {{0 0 0}} 

245  

246 addions PROT Na+ 0 

247 addions PROT Cl- 0 

248  

249 addIonsRand PROT Na+ {num_ions} Cl- {num_ions} 

250  

251 savepdb PROT {out_pdb} 

252 saveamberparm PROT {out_top} {out_coor} 

253 quit 

254 """ 

255 

256 self.temp_tleap(tleap_complex) 

257 

258 def get_pdb_extent(self) -> int: 

259 """ 

260 Identifies the longest axis of the protein in terms of X/Y/Z 

261 projection. Not super accurate but likely good enough for determining 

262 PBC box size. Returns longest axis length + 2 times the padding 

263 to account for +/- padding. 

264 

265 Returns: 

266 (int): Longest dimension with 2 times padding in Angstrom. 

267 """ 

268 lines = [line for line in open(self.pdb).readlines() if 'ATOM' in line] 

269 xs, ys, zs = [], [], [] 

270 

271 for line in lines: 

272 xs.append(float(line[30:38].strip())) 

273 ys.append(float(line[38:46].strip())) 

274 zs.append(float(line[46:54].strip())) 

275 

276 xtent = (max(xs) - min(xs)) 

277 ytent = (max(ys) - min(ys)) 

278 ztent = (max(zs) - min(zs)) 

279 

280 return int(max([xtent, ytent, ztent]) + 2 * self.pad) 

281 

282 def clean_up_directory(self) -> None: 

283 """ 

284 Remove leap log. This is placed wherever the script calling it 

285 runs and likely will throw errors if multiple systems are 

286 being iteratively built. 

287 

288 Returns: 

289 None 

290 """ 

291 (self.path / 'build').mkdir(exist_ok=True) 

292 for f in self.path.glob('*'): 

293 if not any([ext in f.name for ext in ['.prmtop', '.inpcrd', 'build']]): 

294 f.rename(f.parent / 'build' / f.name) 

295 

296 @staticmethod 

297 def get_ion_numbers(volume: float) -> int: 

298 """ 

299 Returns the number of Chloride? ions required to achieve 150mM 

300 concentration for a given volume. The number of Sodium counter 

301 ions should be equivalent. 

302 

303 Arguments: 

304 volume (float): Volume of box in cubic Angstrom. 

305 

306 Returns: 

307 (int): Number of ions for 150mM NaCl. 

308 """ 

309 return round(volume * 10e-6 * 9.03)