Coverage for src / molecular_simulations / build / build_interface.py: 28%

58 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-12 10:07 -0600

1from .build_amber import ExplicitSolvent 

2import MDAnalysis as mda 

3import numpy as np 

4from pathlib import Path 

5from typing import Any, Union 

6import yaml 

7 

8PathLike = Union[Path, str] 

9Config = dict[str, Any] 

10 

11class InterfaceBuilder(ExplicitSolvent): 

12 def __init__(self, 

13 path: PathLike, 

14 pdb: str, 

15 interfaces: Config, 

16 target: PathLike, 

17 binder: PathLike, 

18 padding: float=10., 

19 protein: bool=True, 

20 rna: bool=False, 

21 dna: bool=False, 

22 polarizable: bool=False): 

23 """ 

24 For a given target/binder pair, build systems for driving binding to 

25 each of the supplied interfaces using DeepDriveMD. Includes writing 

26 out the required yaml files for running DeepDrive. 

27 """ 

28 super().__init__(path, pdb, padding, protein, rna, dna, polarizable) 

29 self.interfaces = interfaces 

30 self.target = mda.Universe(target).select_atoms('all') 

31 self.binder = binder 

32 self.root = self.path / target.name[:-4] 

33 self.com = self.target.center_of_mass() 

34 

35 def build_all(self): 

36 """ 

37 Iterates through each interface site for a given target and 

38 builds the corresponding system for the supplied miniprotein 

39 binder. 

40 """ 

41 for site in self.interfaces.keys(): 

42 # set pathing for this target/binder/site 

43 self.yaml_out = self.root / site / self.binder.name[:-4] 

44 self.out = self.yaml_out / 'ddmd' 

45 self.out.mkdir(parents=True, exist_ok=True) 

46 self.out = self.out / 'system' 

47 self.build_dir = self.yaml_out / 'build' 

48 self.build_dir.mkdir(parents=True, exist_ok=True) 

49 self.pdb = self.build_dir / 'protein.pdb' # need full path to leverage parent methods 

50 

51 cont_sel, dist_sel, vector, com, input_shape = self.parse_interface(site) 

52 

53 binder = self.place_binder(np.array(vector, dtype=np.float32), 

54 np.array(com, dtype=np.float32)) 

55 self.merge_proteins(binder) 

56 

57 self.path = self.build_dir 

58 self.build() 

59 

60 self.write_ddmd_yaml(cont_sel, dist_sel) 

61 self.write_cvae_yaml(input_shape) 

62 

63 def place_binder(self, 

64 vector: np.ndarray, 

65 com: np.ndarray) -> None: 

66 """ 

67 Move binder nearby to the interface as defined by `vector`. Returns 

68 an MDAnalysis AtomGroup for the binder. 

69 """ 

70 u = mda.Universe(self.binder) 

71 sel = u.select_atoms('all') 

72 binder_com = sel.center_of_mass() 

73 

74 sel.positions -= com 

75 sel.positions += vector 

76 

77 return sel 

78 

79 def merge_proteins(self, 

80 binder: mda.AtomGroup) -> None: 

81 """ 

82 Merges the target and binder AtomGroups and writes out 

83 a unified PDB at `self.pdb` so as to leverage the existing 

84 pipeline for building explicit solvent systems. 

85 """ 

86 merged_atoms = mda.Merge(self.target, binder) 

87 

88 with mda.Writer(self.pdb) as W: 

89 W.write(merged_atoms) 

90 

91 def parse_interface(self, 

92 site: str='site0') -> Config: 

93 """ 

94 Returns the relevant data for the current interface site. 

95 """ 

96 s = self.interfaces[site] 

97 N = len(s['contact_sel'][18:].split()) 

98 inp_shape = (1, N, N) 

99 ret = [data for data in s.values()] 

100 ret.append(inp_shape) 

101 return ret # contact_sel, distance_sel, vector, com, inp_shape 

102 

103 def write_ddmd_yaml(self, 

104 contact_selection: str, 

105 distance_selection: str) -> None: 

106 """ 

107 Writes the simulation options yaml for a DeepDriveMD 

108 simulation. 

109 """ 

110 yaml_settings = { 

111 'simulation_input_dir': 'ddmd', 

112 'num_workers': 4, 

113 'simulations_per_train': 6, 

114 'simulations_per_inference': 1, 

115 'num_total_simulations': 1000, 

116 'compute_settings': { 

117 'name': 'polaris', 

118 'num_nodes': 1, 

119 'worker_init': f'module use /soft/modulefiles; module load conda; \ 

120 conda activate deepdrive; cd {self.out}', 

121 'scheduler_options': '#PBS -l filesystems=home:eagle', 

122 'account': 'FoundEpidem', 

123 'queue': 'preemptable', 

124 'walltime': '72:00:00', 

125 }, 

126 'simulation_settings': { 

127 'solvent_type': 'explicit', 

128 'dt_ps': 0.004, 

129 'mda_selection': contact_selection, 

130 'mda_selection_resid_list': None, 

131 'simulation_length_ns': 10, 

132 'report_interval_ps': 10, 

133 'temperature_kelvin': 300, 

134 'rmsd_reference_pdb': None, 

135 'distance_sels': distance_selection, 

136 }, 

137 'train_settings': { 

138 'cvae_settings_yaml': 'cvae-prod-settings.yaml', 

139 }, 

140 } 

141 

142 with open(self.yaml_out / 'prod.yaml', 'w') as f: 

143 yaml.dump(yaml_settings, f) 

144 

145 def write_cvae_yaml(self, 

146 input_shape: list[int]) -> None: 

147 """ 

148 Writes the CVAE options yaml for a DeepDriveMD 

149 simulation. 

150 """ 

151 yaml_settings = { 

152 'input_shape': input_shape, 

153 'filters': [16, 16, 16, 16], 

154 'kernels': [3, 3, 3, 3], 

155 'strides': [1, 1, 1, 2], 

156 'affine_widths': [128], 

157 'affine_dropouts': [0.5], 

158 'latent_dim': 3, 

159 'lambda_rec': 1.0, 

160 'num_data_workers': 4, 

161 'prefetch_factor': 2, 

162 'batch_size': 64, 

163 'device': 'cuda', 

164 'optimizer_name': 'RMSprop', 

165 'optimizer_hparams': { 

166 'lr': 0.001, 

167 'weight_decay': 0.00001, 

168 }, 

169 'epochs': 20, 

170 'checkpoint_log_every': 20, 

171 'plot_log_every': 20, 

172 'plot_n_samples': 5000, 

173 'plot_method': 'raw', 

174 } 

175 

176 with open(self.yaml_out / 'cvae-prod-settings.yaml', 'w') as f: 

177 yaml.dump(yaml_settings, f)