Coverage for src / molecular_simulations / build / build_amber.py: 87%
103 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-13 01:26 -0600
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-13 01:26 -0600
1#!/usr/bin/env python
2import logging
3from openmm.app import PDBFile
4import os
5from pathlib import Path
6import subprocess
7import tempfile
8from typing import Dict, List, Optional, Union
10PathLike = Union[str, Path]
11OptPath = Union[str, Path, None]
13logger = logging.getLogger(__name__)
15class ImplicitSolvent:
16 """
17 Class for building a system using ambertools. Produces explicit solvent cubic box
18 with user-specified padding which has been neutralized and ionized with 150mM NaCl.
19 """
20 def __init__(self,
21 path: OptPath,
22 pdb: str,
23 protein: bool=True,
24 rna: bool=False,
25 dna: bool=False,
26 phos_protein: bool=False,
27 mod_protein: bool=False,
28 out: OptPath=None,
29 delete_temp_file: bool=True,
30 amberhome: Optional[str]=None,
31 **kwargs):
32 if path is None:
33 self.path = Path(pdb).parent
34 elif isinstance(path, str):
35 self.path = Path(path)
36 else:
37 self.path = path
39 self.path = self.path.resolve()
40 self.path.mkdir(exist_ok=True, parents=True)
42 self.pdb = Path(pdb).resolve()
44 if out is not None:
45 self.out = self.path / out
46 else:
47 self.out = self.path / 'system.pdb'
49 self.out = self.out.resolve()
50 self.delete = delete_temp_file
52 if amberhome is None:
53 if 'AMBERHOME' in os.environ:
54 amberhome = os.environ['AMBERHOME']
55 else:
56 raise ValueError(f'AMBERHOME is not set in env vars!')
58 self.tleap = str(Path(amberhome) / 'bin' / 'tleap')
59 self.pdb4amber = str(Path(amberhome) / 'bin' / 'pdb4amber')
61 switches = [protein, rna, dna, phos_protein, mod_protein]
62 ffs = [
63 'leaprc.protein.ff19SB',
64 'leaprc.RNA.Shaw',
65 'leaprc.DNA.OL21',
66 'leaprc.phosaa19SB',
67 'leaprc.protein.ff14SB_modAA'
68 ]
70 self.ffs = [
71 ff for ff, switch in zip(ffs, switches) if switch
72 ]
74 for key, val in kwargs.items():
75 setattr(self, key, val)
77 def build(self) -> None:
78 """
79 Orchestrate the various things that need to happen in order
80 to produce an explicit solvent system. This includes running
81 `pdb4amber`, computing the periodic box size, number of ions
82 needed and running `tleap` to make the final system.
84 Returns:
85 None
86 """
87 logger.info('Build start: implicit solvent',
88 extra={'pdb': str(self.pdb), 'out': str(self.out)})
89 self.tleap_it()
90 logger.info('Build finished')
92 def tleap_it(self) -> None:
93 """
94 While more painful, tleap is more stable for system building.
95 Runs the input PDB through tleap with the FF19SB protein forcefield
96 and whichever other forcefields were turned on.
98 Returns:
99 None
100 """
101 ffs = '\n'.join([f'source {ff}' for ff in self.ffs])
102 tleap_in = f"""
103 {ffs}
104 prot = loadpdb {self.pdb}
105 set default pbradii mbondi3
106 savepdb prot {self.out}
107 saveamberparm prot {self.out.with_suffix('.prmtop')} {self.out.with_suffix('.inpcrd')}
108 quit
109 """
111 self.temp_tleap(tleap_in)
113 def write_leap(self,
114 inp: str) -> Path:
115 """
116 Writes out a tleap input file and returns the path
117 to the file.
119 Returns:
120 (Path): Path to tleap input file.
121 """
122 leap_file = f'{self.path}/tleap.in'
123 with open(leap_file, 'w') as outfile:
124 outfile.write(inp)
126 return Path(leap_file)
128 def temp_tleap(self,
129 inp: str) -> None:
130 """
131 Writes a temporary file for tleap and then runs tleap. This
132 makes handling parallel tleap runs much simpler as we are avoiding
133 the different workers overwriting tleap inputs and getting confused.
135 Arguments:
136 inp (str): The tleap input file contents as a single string.
138 Returns:
139 None
140 """
141 with tempfile.NamedTemporaryFile(mode='w+',
142 suffix='.in',
143 delete=self.delete,
144 dir=str(self.path)) as temp_file:
145 temp_file.write(inp)
146 temp_file.flush()
147 tleap_command = f'{self.tleap} -f {temp_file.name}'
148 subprocess.run(tleap_command, shell=True, cwd=str(self.path), check=True,
149 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
151class ExplicitSolvent(ImplicitSolvent):
152 """
153 Class for building a system using ambertools. Produces explicit solvent cubic box
154 with user-specified padding which has been neutralized and ionized with 150mM NaCl.
155 """
156 def __init__(self,
157 path: PathLike,
158 pdb: PathLike,
159 padding: float=10.,
160 protein: bool=True,
161 rna: bool=False,
162 dna: bool=False,
163 phos_protein: bool=False,
164 mod_protein: bool=False,
165 polarizable: bool=False,
166 delete_temp_file: bool=True,
167 amberhome: Optional[str]=None,
168 **kwargs):
169 super().__init__(path=path,
170 pdb=pdb,
171 protein=protein,
172 rna=rna,
173 dna=dna,
174 phos_protein=phos_protein,
175 mod_protein=mod_protein,
176 out=None,
177 delete_temp_file=delete_temp_file,
178 amberhome=amberhome,
179 **kwargs)
180 self.pad = padding
181 self.ffs.extend(['leaprc.water.opc'])
182 self.water_box = 'OPCBOX'
184 if polarizable:
185 self.ffs[0] = 'leaprc.protein.ff15ipq'
186 self.ffs[-1] = 'leaprc.water.spceb'
187 self.water_box = 'SPCBOX'
189 def build(self) -> None:
190 """
191 Orchestrate the various things that need to happen in order
192 to produce an explicit solvent system. This includes running
193 `pdb4amber`, computing the periodic box size, number of ions
194 needed and running `tleap` to make the final system.
196 Returns:
197 None
198 """
199 logger.info('Build started: explicit solvent',
200 extra={'pdb': str(self.pdb), 'out': str(self.out)})
201 self.prep_pdb()
202 dim = self.get_pdb_extent()
203 num_ions = self.get_ion_numbers(dim**3)
204 self.assemble_system(dim, num_ions)
205 self.clean_up_directory()
206 logger.info('Build finished')
208 def prep_pdb(self) -> None:
209 """
210 Runs input PDB through `pdb4amber` to ensure it is compliant enough
211 that tleap won't freak out on us later. Removes any explicit hydrogens
212 from the input structure to avoid name mismatches.
214 Returns:
215 None
216 """
217 cmd = f'{self.pdb4amber} -i {self.pdb} -o {self.path}/protein.pdb -y'
218 subprocess.run(cmd, shell=True, cwd=str(self.path), check=True,
219 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
221 self.pdb = f'{self.path}/protein.pdb'
223 def assemble_system(self, dim: float, num_ions: int) -> None:
224 """
225 Build system in tleap.
227 Arguments:
228 dim (float): Longest dimension in Angstrom.
229 num_ions (int): Explicit number of ions to achieve 150mM.
231 Returns:
232 None
233 """
234 tleap_ffs = '\n'.join([f'source {ff}' for ff in self.ffs])
235 out_pdb = self.out
236 out_top = self.out.with_suffix('.prmtop')
237 out_coor = self.out.with_suffix('.inpcrd')
239 tleap_complex = f"""{tleap_ffs}
240 PROT = loadpdb {self.pdb}
242 setbox PROT centers
243 set PROT box {{{dim} {dim} {dim}}}
244 solvatebox PROT {self.water_box} {{0 0 0}}
246 addions PROT Na+ 0
247 addions PROT Cl- 0
249 addIonsRand PROT Na+ {num_ions} Cl- {num_ions}
251 savepdb PROT {out_pdb}
252 saveamberparm PROT {out_top} {out_coor}
253 quit
254 """
256 self.temp_tleap(tleap_complex)
258 def get_pdb_extent(self) -> int:
259 """
260 Identifies the longest axis of the protein in terms of X/Y/Z
261 projection. Not super accurate but likely good enough for determining
262 PBC box size. Returns longest axis length + 2 times the padding
263 to account for +/- padding.
265 Returns:
266 (int): Longest dimension with 2 times padding in Angstrom.
267 """
268 lines = [line for line in open(self.pdb).readlines() if 'ATOM' in line]
269 xs, ys, zs = [], [], []
271 for line in lines:
272 xs.append(float(line[30:38].strip()))
273 ys.append(float(line[38:46].strip()))
274 zs.append(float(line[46:54].strip()))
276 xtent = (max(xs) - min(xs))
277 ytent = (max(ys) - min(ys))
278 ztent = (max(zs) - min(zs))
280 return int(max([xtent, ytent, ztent]) + 2 * self.pad)
282 def clean_up_directory(self) -> None:
283 """
284 Remove leap log. This is placed wherever the script calling it
285 runs and likely will throw errors if multiple systems are
286 being iteratively built.
288 Returns:
289 None
290 """
291 (self.path / 'build').mkdir(exist_ok=True)
292 for f in self.path.glob('*'):
293 if not any([ext in f.name for ext in ['.prmtop', '.inpcrd', 'build']]):
294 f.rename(f.parent / 'build' / f.name)
296 @staticmethod
297 def get_ion_numbers(volume: float) -> int:
298 """
299 Returns the number of Chloride? ions required to achieve 150mM
300 concentration for a given volume. The number of Sodium counter
301 ions should be equivalent.
303 Arguments:
304 volume (float): Volume of box in cubic Angstrom.
306 Returns:
307 (int): Number of ions for 150mM NaCl.
308 """
309 return round(volume * 10e-6 * 9.03)