1 """
2 Measure the precision and coverage of a fragment library stored in Rosetta
3 NNmake format.
4 """
5
6 import os
7 import multiprocessing
8 import matplotlib
9
10 import csb.apps
11
12 import csb.bio.io.wwpdb as wwpdb
13 import csb.bio.structure as structure
14 import csb.bio.fragments
15 import csb.bio.fragments.rosetta as rosetta
16 import csb.core
17 import csb.io.plots
24
26
27 @property
30
32
33 cpu = multiprocessing.cpu_count()
34 cmd = csb.apps.ArgHandler(self.program, __doc__)
35
36 cmd.add_scalar_option('pdb', 'p', str, 'the PDB database (a directory containing all PDB files)', required=True)
37 cmd.add_scalar_option('native', 'n', str, 'native structure of the target (PDB file)', required=True)
38 cmd.add_scalar_option('chain', 'c', str, 'chain identifier (if not specified, the first chain)', default=None)
39
40 cmd.add_scalar_option('top', 't', int, 'read top N fragments per position', default=25)
41 cmd.add_scalar_option('cpu', 'C', int, 'maximum degree of parallelism', default=cpu)
42 cmd.add_scalar_option('rmsd', 'r', float, 'RMSD cutoff for precision and coverage', default=1.5)
43 cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
44
45 cmd.add_boolean_option('save-structures', 's', 'create a PDB file for each fragment, superimposed over the native', default=False)
46
47 cmd.add_positional_argument('library', str, 'Fragment library file in Rosetta NNmake format')
48
49 return cmd
50
52
54
55 if not os.path.isdir(self.args.output):
56 PrecisionApp.exit('Output directory does not exist', code=ExitCodes.INVALID_DATA, usage=True)
57
58 for file in [self.args.native, self.args.library]:
59 if not os.path.isfile(file):
60 PrecisionApp.exit('File not found: {0}'.format(file), code=ExitCodes.INVALID_DATA, usage=True)
61
62 self.log('\nLoading {0.library} (top {0.top} per position)... '.format(self.args), ending='')
63
64 try:
65 library = rosetta.RosettaFragmentMap.read(self.args.library, top=self.args.top)
66 self.log('{0} fragments'.format(len(library)))
67
68 try:
69 native = wwpdb.RegularStructureParser(self.args.native).parse_structure()
70 native.accession = native.accession[:4]
71 if self.args.chain:
72 native = native.chains[self.args.chain]
73 else:
74 native = native.first_chain
75 except (structure.Broken3DStructureError, wwpdb.PDBParseError) as se:
76 raise ArgumentError("Can't parse native structure: " + str(se))
77
78 except (csb.core.ItemNotFoundError) as ce:
79 raise ArgumentError("Chain {0!s} not found in native structure".format(ce))
80
81 self.log('Superimposing fragments...\n')
82 si = LibrarySuperimposer(native, library, self.args.pdb, self.args.output,
83 save=self.args.save_structures, cutoff=self.args.rmsd)
84 matches = si.run(self.args.cpu)
85
86 info = si.plot(matches)
87 self.log(' RMSD Cutoff {0:>6.2f} A'.format(self.args.rmsd))
88 self.log(' AVG Precision {0.precision:>6.2f} %'.format(info))
89 self.log(' Coverage {0.coverage:>6.2f} %'.format(info))
90 self.log(' RW Precision {0.figure}'.format(si))
91
92
93
94 except ArgumentIOError as ioe:
95 PrecisionApp.exit(str(ioe), code=ExitCodes.INVALID_DATA)
96
97 except ArgumentError as ae:
98 PrecisionApp.exit(str(ae), code=ExitCodes.INVALID_DATA)
99
100 self.log('\nDone.')
101
104
107
109
110 - def __init__(self, precision, coverage):
113
115 return '{0.precision:6.2f} {0.coverage:6.2f}'.format(self)
116
119
121
122 - def __init__(self, native, library, pdb, output, save=False, cutoff=1.5):
145
147 try:
148 if self._out and not self._out.closed:
149 self._out.close()
150 except:
151 pass
152
153 @property
156
157 @property
160
161 - def run(self, cpu=1):
188
189 - def plot(self, matches):
190
191 residues = range(1, self._native.length + 1)
192 precision = []
193 precision2 = []
194 background = []
195 covered = set()
196
197 all = {}
198 positive = {}
199
200 for rank in residues:
201 all[rank] = 0
202 positive[rank] = 0
203
204 for match in matches:
205 for rank in range(match.qstart, match.qend + 1):
206 all[rank] += 1
207
208 if match.rmsd <= self._cutoff:
209 positive[rank] += 1
210 covered.add(rank)
211
212 assert sorted(all.keys()) == sorted(positive.keys()) == residues
213
214 for rank in residues:
215 if all[rank] == 0:
216 precision2.append(0)
217 background.append(0)
218 else:
219 p = positive[rank] * 100.0 / all[rank]
220 precision.append(p)
221 precision2.append(p)
222 background.append(100)
223
224 coverage = len(covered) * 100.0 / len(residues)
225 if len(precision) > 0:
226 avg_precision = sum(precision) / len(precision)
227 else:
228 avg_precision = 0
229
230 with csb.io.plots.Chart() as chart:
231
232 chart.plot.bar(residues, background, color='#FFB0B0', linewidth=None, edgecolor='#FFB0B0')
233 chart.plot.bar(residues, precision2, color='#50A6DA', linewidth=None, edgecolor='#50A6DA')
234
235 chart.plot.set_title(self._native.entry_id)
236 chart.plot.set_xlabel('Residue')
237 chart.plot.set_xlim(1, len(residues))
238 chart.plot.set_ylabel('Precision, %')
239 chart.plot.set_ylim(0, 100)
240
241 xaxis = chart.plot.axes.xaxis
242 xaxis.set_minor_locator(matplotlib.ticker.IndexLocator(1, 0))
243 xaxis.set_major_locator(matplotlib.ticker.IndexLocator(5, 0))
244
245 try:
246 chart.width = 15
247 chart.height = 5.5
248
249 chart.save(self._figure)
250
251 except IOError as io:
252 raise ArgumentIOError("Can't save figure: " + str(io))
253
254 return GlobalInfo(avg_precision, coverage)
255
256 -def rmsd(target, source, fragments, pdb, save=None):
257
258 matches = []
259
260 try:
261 src_file = wwpdb.find(source, [pdb])
262 if src_file is None:
263 raise IOError("Can't find structure {0} in {1}".format(source, pdb))
264
265 src_structure = wwpdb.RegularStructureParser(src_file).parse_structure()
266
267 except (IOError, structure.Broken3DStructureError, wwpdb.PDBParseError) as ex:
268 matches.append('Error parsing {0:5}: {1!s}'.format(source, ex))
269 return matches
270
271 for fn, fragment in enumerate(fragments):
272 try:
273 if fragment.chain not in ('_', '', None):
274 src_chain = src_structure.chains[fragment.chain]
275 else:
276 src_chain = src_structure.first_chain
277
278 try:
279 query = target.subregion(fragment.qstart, fragment.qend)
280 subject = src_chain.subregion(fragment.start, fragment.end, clone=True)
281
282 except IndexError:
283 matches.append('Fragment {1.source_id:>5} {0.start:>4}-{0.end:>4} is out of range'.format(fragment))
284 continue
285
286 si = query.align(subject)
287 match = csb.bio.fragments.FragmentMatch(fragment.id, qstart=fragment.qstart, qend=fragment.qend,
288 probability=None, rmsd=si.rmsd, tm_score=None, qlength=target.length)
289 matches.append(match)
290
291 if save:
292 dummy = structure.Structure(subject.entry_id)
293 dummy.chains.append(subject)
294 filename = '{0.qstart:}-{0.qend}-{0.source_id}.{1.entry_id}{2}.frag'.format(fragment, query, fn or '')
295 dummy.to_pdb(os.path.join(save, filename))
296
297 except (structure.Broken3DStructureError, IOError) as ex:
298 matches.append("Can't superimpose fragment {0}: {1!s}".format(fragment.id, ex))
299 continue
300
301 return matches
302
303
304
305 if __name__ == '__main__':
306
307 AppRunner().run()
308