1 """
2 ProMix: Take a protein structure ensemble and find a mixture of rigid
3 segments or a mixture of conformers. Writes K copies of the ensemble
4 (for segments) or K subsets of the ensemble (for conformers) as PDB
5 files, each superposed on different components.
6
7 Reference: Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92
8 """
9
10 import sys
11 import numpy
12
13 import csb.apps
14 import csb.bio.structure
15
16 from csb.bio.io.wwpdb import LegacyStructureParser
17 from csb.statistics import mixtures
22
24
25 @property
28
30 cmd = csb.apps.ArgHandler(self.program, __doc__)
31
32 cmd.add_scalar_option('components', 'K', int, 'Number of components', -1)
33 cmd.add_scalar_option('type', 't', str, 'Type of mixture', 'segments', ('segments', 'conformers'))
34 cmd.add_positional_argument('infile', str, 'input PDB file')
35
36 return cmd
37
41
43
45 try:
46 parser = LegacyStructureParser(self.args.infile)
47 models = parser.models()
48 except:
49 self.exit('PDB file parsing failed', ExitCodes.IO_ERROR)
50
51 if len(models) < 2:
52 self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)
53
54 ensemble = parser.parse_models(models)
55 X = numpy.array([model.list_coordinates(['CA'], True) for model in ensemble])
56
57 if self.args.type == 'segments':
58 self.main_segments(ensemble, X)
59 elif self.args.type == 'conformers':
60 self.main_conformers(ensemble, X)
61 else:
62 raise ValueError('type must be "segments" or "conformers"')
63
64 - def main_segments(self, ensemble, X):
65
66 mixture = mixtures.SegmentMixture.new(X, self.args.components)
67 self.log('Number of segments: {0}'.format(mixture.K))
68
69 for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
70 outfile = 'promix_segment_{0}.pdb'.format(k+1)
71 self.log(' {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))
72
73 for model, R, t in zip(ensemble, mixture.R, mixture.t):
74 if k > 0:
75 model.transform(R[k-1], t[k-1])
76 R = R[k].T
77 t = -numpy.dot(R, t[k])
78 model.transform(R, t)
79
80 ensemble.to_pdb(outfile)
81
83
84 mixture = mixtures.ConformerMixture.new(X, self.args.components)
85 self.log('Number of conformers: {0}'.format(mixture.K))
86
87 membership = mixture.membership
88
89 for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
90 outfile = 'promix_conformer_{0}.pdb'.format(k+1)
91 self.log(' {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))
92
93 ek = csb.bio.structure.Ensemble()
94
95 for model, R, t, mk in zip(ensemble, mixture.R, mixture.t, membership):
96 if mk != k:
97 continue
98 R = R[k].T
99 t = -numpy.dot(R, t[k])
100 model.transform(R, t)
101 ek.models.append(model)
102
103 ek.to_pdb(outfile)
104
105 if __name__ == '__main__':
106 AppRunner(sys.argv).run()
107
108
109