Coverage for src / molecular_simulations / build / build_interface.py: 28%
58 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 10:07 -0600
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-12 10:07 -0600
1from .build_amber import ExplicitSolvent
2import MDAnalysis as mda
3import numpy as np
4from pathlib import Path
5from typing import Any, Union
6import yaml
8PathLike = Union[Path, str]
9Config = dict[str, Any]
11class InterfaceBuilder(ExplicitSolvent):
12 def __init__(self,
13 path: PathLike,
14 pdb: str,
15 interfaces: Config,
16 target: PathLike,
17 binder: PathLike,
18 padding: float=10.,
19 protein: bool=True,
20 rna: bool=False,
21 dna: bool=False,
22 polarizable: bool=False):
23 """
24 For a given target/binder pair, build systems for driving binding to
25 each of the supplied interfaces using DeepDriveMD. Includes writing
26 out the required yaml files for running DeepDrive.
27 """
28 super().__init__(path, pdb, padding, protein, rna, dna, polarizable)
29 self.interfaces = interfaces
30 self.target = mda.Universe(target).select_atoms('all')
31 self.binder = binder
32 self.root = self.path / target.name[:-4]
33 self.com = self.target.center_of_mass()
35 def build_all(self):
36 """
37 Iterates through each interface site for a given target and
38 builds the corresponding system for the supplied miniprotein
39 binder.
40 """
41 for site in self.interfaces.keys():
42 # set pathing for this target/binder/site
43 self.yaml_out = self.root / site / self.binder.name[:-4]
44 self.out = self.yaml_out / 'ddmd'
45 self.out.mkdir(parents=True, exist_ok=True)
46 self.out = self.out / 'system'
47 self.build_dir = self.yaml_out / 'build'
48 self.build_dir.mkdir(parents=True, exist_ok=True)
49 self.pdb = self.build_dir / 'protein.pdb' # need full path to leverage parent methods
51 cont_sel, dist_sel, vector, com, input_shape = self.parse_interface(site)
53 binder = self.place_binder(np.array(vector, dtype=np.float32),
54 np.array(com, dtype=np.float32))
55 self.merge_proteins(binder)
57 self.path = self.build_dir
58 self.build()
60 self.write_ddmd_yaml(cont_sel, dist_sel)
61 self.write_cvae_yaml(input_shape)
63 def place_binder(self,
64 vector: np.ndarray,
65 com: np.ndarray) -> None:
66 """
67 Move binder nearby to the interface as defined by `vector`. Returns
68 an MDAnalysis AtomGroup for the binder.
69 """
70 u = mda.Universe(self.binder)
71 sel = u.select_atoms('all')
72 binder_com = sel.center_of_mass()
74 sel.positions -= com
75 sel.positions += vector
77 return sel
79 def merge_proteins(self,
80 binder: mda.AtomGroup) -> None:
81 """
82 Merges the target and binder AtomGroups and writes out
83 a unified PDB at `self.pdb` so as to leverage the existing
84 pipeline for building explicit solvent systems.
85 """
86 merged_atoms = mda.Merge(self.target, binder)
88 with mda.Writer(self.pdb) as W:
89 W.write(merged_atoms)
91 def parse_interface(self,
92 site: str='site0') -> Config:
93 """
94 Returns the relevant data for the current interface site.
95 """
96 s = self.interfaces[site]
97 N = len(s['contact_sel'][18:].split())
98 inp_shape = (1, N, N)
99 ret = [data for data in s.values()]
100 ret.append(inp_shape)
101 return ret # contact_sel, distance_sel, vector, com, inp_shape
103 def write_ddmd_yaml(self,
104 contact_selection: str,
105 distance_selection: str) -> None:
106 """
107 Writes the simulation options yaml for a DeepDriveMD
108 simulation.
109 """
110 yaml_settings = {
111 'simulation_input_dir': 'ddmd',
112 'num_workers': 4,
113 'simulations_per_train': 6,
114 'simulations_per_inference': 1,
115 'num_total_simulations': 1000,
116 'compute_settings': {
117 'name': 'polaris',
118 'num_nodes': 1,
119 'worker_init': f'module use /soft/modulefiles; module load conda; \
120 conda activate deepdrive; cd {self.out}',
121 'scheduler_options': '#PBS -l filesystems=home:eagle',
122 'account': 'FoundEpidem',
123 'queue': 'preemptable',
124 'walltime': '72:00:00',
125 },
126 'simulation_settings': {
127 'solvent_type': 'explicit',
128 'dt_ps': 0.004,
129 'mda_selection': contact_selection,
130 'mda_selection_resid_list': None,
131 'simulation_length_ns': 10,
132 'report_interval_ps': 10,
133 'temperature_kelvin': 300,
134 'rmsd_reference_pdb': None,
135 'distance_sels': distance_selection,
136 },
137 'train_settings': {
138 'cvae_settings_yaml': 'cvae-prod-settings.yaml',
139 },
140 }
142 with open(self.yaml_out / 'prod.yaml', 'w') as f:
143 yaml.dump(yaml_settings, f)
145 def write_cvae_yaml(self,
146 input_shape: list[int]) -> None:
147 """
148 Writes the CVAE options yaml for a DeepDriveMD
149 simulation.
150 """
151 yaml_settings = {
152 'input_shape': input_shape,
153 'filters': [16, 16, 16, 16],
154 'kernels': [3, 3, 3, 3],
155 'strides': [1, 1, 1, 2],
156 'affine_widths': [128],
157 'affine_dropouts': [0.5],
158 'latent_dim': 3,
159 'lambda_rec': 1.0,
160 'num_data_workers': 4,
161 'prefetch_factor': 2,
162 'batch_size': 64,
163 'device': 'cuda',
164 'optimizer_name': 'RMSprop',
165 'optimizer_hparams': {
166 'lr': 0.001,
167 'weight_decay': 0.00001,
168 },
169 'epochs': 20,
170 'checkpoint_log_every': 20,
171 'plot_log_every': 20,
172 'plot_n_samples': 5000,
173 'plot_method': 'raw',
174 }
176 with open(self.yaml_out / 'cvae-prod-settings.yaml', 'w') as f:
177 yaml.dump(yaml_settings, f)