1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
21
22 from csb.bio.structure import SecondaryStructure
32
38
39 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
40 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
41 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
42 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
43 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
44 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
45 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
46 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
47 53: 11.831245255954073, 54: 11.918975893263905 }
50 """
51 Base class, representing a match between a fragment and its target.
52 """
53
54 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
63
64 @property
67
68 @property
71
72 @property
75
76 @property
79
80 @property
83
84 @property
87
88 @property
90 return self._probability
91
92 @property
95
96 @property
98 raise NotImplementedError()
99
100 @property
102 raise NotImplementedError()
103
104 @property
106 raise NotImplementedError()
107
109
110 - def __init__(self, target, isites_prediction, hmm_prediction, combined_prediction):
111
112 self.target = target
113
114 self.isites = isites_prediction
115 self.hmm = hmm_prediction
116 self.combined = combined_prediction
117
119
120 - def __init__(self, alignment, coordinates):
124
126 """
127 Fragment-based phi/psi angles predictor.
128
129 @param target: target protein, containing fragment assignments
130 @type target: L{Target}
131 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
132 @type threshold: float
133 @param extend: pick alternative, longer cluster reps, if possible
134 @type extend: bool
135 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
136 will be performed on demand (the first time C{predictor.compute()} is invoked)
137
138 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
139 time. Subsequent calls will be very fast.
140 """
141
142 - def __init__(self, target, threshold=1.5, extend=False, init=False):
143
144 if not isinstance(target, Target):
145 raise TypeError(target)
146 if target.matches.length == 0:
147 raise ValueError('This target has no fragment assignments')
148
149 self._target = target
150 self._threshold = float(threshold)
151 self._extend = bool(extend)
152
153 self._initialized = False
154 self._reps = {}
155 self._clusters = {}
156
157 if init:
158 self.init()
159
160 @property
163
164 @property
166 return self._threshold
167
168 @property
171
192
208
216
239
272
274 """
275 Filter the current fragment map and create a new, completely flat,
276 non-overlapping map built from centroids, assigned iteratively by
277 decreasing confidence. Centroids with lower confidence which overlap
278 with previously assigned centroids will be trimmed to fill existing
279 gaps only.
280
281 @return: L{TorsionPredictionInfo} instances, one for each target residue
282 @rtype: tuple of L{TorsionPredictionInfo}
283 """
284
285 if not self._initialized:
286 self.init()
287
288 prediction = []
289 slots = set(range(1, self.target.length + 1))
290
291 reps = list(self._reps.values())
292 reps.sort(key=lambda i: i.confidence, reverse=True)
293
294 for rep in reps:
295
296 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
297 if rank in slots:
298 torsion = rep.centroid.torsion_at(rank, rank)[0]
299 ss = rep.centroid.sec_structure_at(rank, rank)[0]
300 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
301
302 prediction.append(info)
303 slots.remove(rank)
304
305 for rank in slots:
306 prediction.append(TorsionPredictionInfo(rank, 0, None))
307
308 prediction.sort(key=lambda i: i.rank)
309 return tuple(prediction)
310
312 """
313 Extract all torsion angles coming from all fragments, which had survived
314 the filtering and cover residue C{#rank}.
315
316 @param rank: target residue rank
317 @type rank: int
318
319 @return: all L{TorsionAngles} for a cluster at the specified residue
320 @rtype: tuple of L{TorsionAngles}
321 """
322
323 if not self._initialized:
324 self.init()
325 if rank not in self._clusters:
326 return tuple()
327
328 angles = []
329
330 for node in self._clusters[rank]:
331 fragment = node.fragment
332 torsion = fragment.torsion_at(rank, rank)[0]
333 angles.append(torsion)
334
335 return tuple(angles)
336
339 """
340 Struct container for a single torsion angle prediction.
341
342 @param rank: target residue rank
343 @type rank: int
344 @param confidence: confidence of prediction
345 @type confidence: float
346 @param torsion: assigned phi/psi/omega angles
347 @type torsion: L{TorsionAngles}
348 @param dssp: assigned secondary structure
349 @type dssp: L{SecondaryStructureElement}
350 @param primary: if True, designates that the assigned angles are extracted
351 from the L{ClusterRep} at residue C{#rank}; otherwise: the
352 angles are coming from another, overlapping L{ClusterRep}
353
354 """
355
356 - def __init__(self, rank, confidence, torsion, dssp, primary=False):
363
370
372 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
373
376
388
399
400 -class Target(csb.core.AbstractNIContainer):
401 """
402 Represents a protein structure prediction target.
403
404 @param id: target sequence ID, in PDB accnC format
405 @type id: str
406 @param length: total target sequence length
407 @type length: int
408 @param residues: a list, containing target's residues. See also
409 L{Target.from_sequence}
410 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
411 """
412
432
433 @staticmethod
454
455 @staticmethod
468
469 @staticmethod
474
475 @property
477 return self._residues
478
479 @property
482
483 @property
486
487 @property
489 return self._accession
490
491 @property
493 return self._chain_id
494
495 @property
498
499 @property
502
503 @property
506
507 @property
509 return self._assignments
510
511 @property
513 return self._residues
514
515 @property
517 return self._segments
518
540
542 """
543 Assign a bunch of fragments at once.
544 @type fragments: iterable of L{Assignment}s
545 """
546 for frag in fragments:
547 self.assign(frag)
548
549 - def filter(self, threshold=1.5, extend=False):
550 """
551 Filter the current fragment map using a L{FragmentCluster}.
552
553 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
554 @type threshold: float
555 @param extend: pick extended alternatives where possible (default=False)
556 @type extend: bool
557
558 @return: a new target, containing only cluster centroids/reps
559 @rtype: L{Target}
560 """
561
562 target = self.clone()
563
564 for residue in self.residues:
565 rep = residue.filter(threshold=threshold, extend=extend)
566
567 if rep is not None:
568 target.assign(rep.centroid)
569
570 return target
571
585
587
588 - def __init__(self, id, length, residues, overlap=None):
592
603
607
609 """
610 Wrapper around L{Target}'s native residues. Decorates them with additional,
611 fragment-related methods.
612
613 @type native_residue: L{csb.bio.structure.ProteinResidue}
614 """
615
621
622 @property
625
626 @property
629
630 @property
632 return self._assignments
633
634 - def assign(self, assignment_info):
635 self._assignments._append_item(assignment_info)
636
655
657 """
658 Filter all fragments, covering this position in the L{Target} using a
659 L{FragmentCluster}.
660
661 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
662 @type method: str
663 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
664 @type threshold: float
665 @param extend: pick extended alternative where possible (default=False)
666 @type extend: bool
667
668 @return: cluster's representative (if converged) or None
669 @rtype: L{ClusterRep} or None
670 """
671
672 try:
673 nodes = []
674 for ai in self.assignments:
675 node = ClusterNode.create(ai.fragment, method, extend)
676 nodes.append(node)
677
678 cluster = FragmentCluster(nodes, threshold=threshold)
679
680 center = cluster.shrink(minitems=0)
681 if center.has_alternative:
682 center.exchange()
683
684 return center
685
686 except (ClusterExhaustedError, ClusterDivergingError):
687 return None
688
701
703 """
704 @return: the residue-wise precision of the fragment library at the
705 current position (percentage).
706
707 @param threshold: true-positive RMSD cutoff (default=1.5)
708 @type threshold: float
709 @rtype: float
710 """
711
712 if self.assignments.length < 1:
713 return None
714 else:
715 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
716 pos = len(positive) * 100.0 / self.assignments.length
717
718 return pos
719
738
875
906
908
909 binsize = float(binsize)
910 bins = numpy.ceil(numpy.array(data) / binsize)
911
912 hist = dict.fromkeys(bins, 0)
913 for bin in bins:
914 hist[bin] += (1.0 / len(bins))
915
916 freq = numpy.array(hist.values())
917 return - numpy.sum(freq * numpy.log(freq))
918
920
921 rmsds = self.pairwise_rmsd()
922 return self._entropy(rmsds, binsize)
923
928
930
931 rmsds = self.pairwise_rmsd()
932
933 if len(rmsds) < 1:
934 return None
935
936 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
937
939
940 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
941
942 if len(sa_rmsds) < 1:
943 return None
944
945 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
946
953
955
956 cons = self.rmsd_consistency()
957
958 if cons is None:
959 return 0
960 else:
961 return numpy.log10(self.count) * cons
962
964
973
974 @property
976 return self._assignment.backbone[self._relrank]
977
978 @property
980 return self._assignment
981
983 """
984 Represents a match between a fragment and its target.
985
986 @param source: source structure (must have torsion angles precomputed)
987 @type source: L{csb.bio.structure.Chain}
988 @param start: start position in C{source} (rank)
989 @type start: int
990 @param end: end position in C{source} (rank)
991 @type end: int
992 @param id: fragment ID
993 @type id: str
994 @param qstart: start position in target (rank)
995 @type qstart: int
996 @param qend: end position in target (rank)
997 @type qend: int
998 @param probability: probability of assignment
999 @type probability: float
1000 @param rmsd: RMSD of the fragment, compared to target's native structure
1001 @type rmsd: float
1002 """
1003
1004 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
1005 tm_score=None, score=None, neff=None, segment=None, internal_id=None):
1006
1007 assert source.has_torsion
1008 sub = source.subregion(start, end, clone=True)
1009 try:
1010 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
1011 except csb.core.ItemNotFoundError:
1012 raise csb.bio.structure.Broken3DStructureError()
1013 torsion = [r.torsion.copy() for r in sub.residues]
1014
1015 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
1016 self._torsion = torsion
1017 self._sequence = sub.sequence
1018
1019 self._source_id = source.accession[:4] + source.id
1020 self._start = start
1021 self._end = end
1022
1023 self._score = score
1024 self._neff = neff
1025 self._ss = None
1026
1027 self._segment_start = segment
1028 self.internal_id = internal_id
1029
1030 if id is None:
1031 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
1032
1033 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
1034
1035 self._ss = SecondaryStructure('-' * self.length)
1036
1037 @staticmethod
1060
1061 @property
1064
1065 @property
1067 return self._sequence
1068
1069 @property
1071 return self._torsion
1072
1073 @property
1075 return self._source_id
1076
1077 @property
1080
1081 @property
1084
1085 @property
1088
1089 @property
1092
1093 @property
1095 return self._segment_start
1096
1097 @property
1100 @secondary_structure.setter
1109
1119
1124
1126 """
1127 @return: True if the fragment is centered around position=C{rank}.
1128 @rtype: bool
1129 """
1130
1131 if self.qstart < rank < self.qend:
1132 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
1133 return True
1134
1135 return False
1136
1138 """
1139 @return: the CA coordinates of the fragment at the specified subregion.
1140 @rtype: list
1141 """
1142
1143 self._check_range(qstart, qend)
1144
1145 relstart = qstart - self.qstart
1146 relend = qend - self.qstart + 1
1147
1148 return self.backbone[relstart : relend]
1149
1151 """
1152 @return: the torsion angles of the fragment at the specified subregion.
1153 @rtype: list
1154 """
1155
1156 self._check_range(qstart, qend)
1157
1158 relstart = qstart - self.qstart
1159 relend = qend - self.qstart + 1
1160
1161 return self.torsion[relstart : relend]
1162
1164
1165 self._check_range(qstart, qend)
1166
1167 relstart = qstart - self.qstart
1168 relend = qend - self.qstart + 1
1169
1170 return sa_string[relstart : relend]
1171
1179
1191
1192 - def chain_at(self, source, qstart, qend):
1200
1202 """
1203 @type other: L{Assignment}
1204 @return: target positions, covered by both C{self} and C{other}
1205 @rtype: set of int
1206 """
1207
1208 qranks = set(range(self.qstart, self.qend + 1))
1209 sranks = set(range(other.qstart, other.qend + 1))
1210
1211 return qranks.intersection(sranks)
1212
1213 - def rmsd_to(self, other, min_overlap=5):
1214 """
1215 @return: the CA RMSD between C{self} and C{other}.
1216
1217 @param other: another fragment
1218 @type other: L{Assignment}
1219 @param min_overlap: require at least that number of overlapping residues
1220 (return None if not satisfied)
1221 @type min_overlap: int
1222
1223 @rtype: float
1224 """
1225
1226 common = self.overlap(other)
1227
1228 if len(common) >= min_overlap:
1229
1230 qstart, qend = min(common), max(common)
1231
1232 q = self.backbone_at(qstart, qend)
1233 s = other.backbone_at(qstart, qend)
1234
1235 if len(q) > 0 and len(s) > 0:
1236 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
1237
1238 return None
1239
1240 - def nrmsd_to(self, other, min_overlap=5):
1255
1256 - def mda_to(self, other, min_overlap=5):
1257
1258 common = self.overlap(other)
1259
1260 if len(common) >= min_overlap:
1261
1262 qstart, qend = min(common), max(common)
1263
1264 q = self.torsion_at(qstart, qend)
1265 s = other.torsion_at(qstart, qend)
1266
1267 if len(q) > 0 and len(s) > 0:
1268
1269 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:])
1270 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1])
1271
1272 return max(maxphi, maxpsi)
1273
1274 return None
1275
1276 - def to_rosetta(self, source, qstart=None, qend=None, weight=None):
1277 """
1278 @deprecated: this method will be deleted soon. Use
1279 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1280 """
1281 stream = csb.io.MemoryStream()
1282
1283 if weight is None:
1284 weight = self.probability
1285 if not qstart:
1286 qstart = self.qstart
1287 if not qend:
1288 qend = self.qend
1289
1290 source.compute_torsion()
1291 chain = self.chain_at(source, qstart, qend)
1292
1293 for i, r in enumerate(chain.residues):
1294
1295 acc = self.source_id[:4]
1296 ch = self.source_id[4].upper()
1297
1298 start = qstart - self.qstart + self.start + i
1299 aa = r.type
1300 ss = 'L'
1301 phi, psi, omega = 0, 0, 0
1302 if r.torsion.phi:
1303 phi = r.torsion.phi
1304 if r.torsion.psi:
1305 psi = r.torsion.psi
1306 if r.torsion.omega:
1307 omega = r.torsion.omega
1308
1309 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1310
1311 return stream.getvalue()
1312
1314
1315 BIT_SCORE_THRESHOLD = 1.1
1316
1317 - def __init__(self, source, start, end, qstart, qend, window, score, rmsd):
1318
1319 self._window = window
1320
1321 super(ChemShiftAssignment, self).__init__(
1322 source, start, end, qstart, qend, id=None, probability=1.0,
1323 rmsd=rmsd, tm_score=None, score=score, neff=None, segment=None, internal_id=None)
1324
1325 @property
1328
1331
1334
1337
1339 """
1340 Provides clustering/filtering of the fragments, covering a common residue
1341 in the target. Clustering is done via iterative shrinking of the cluster.
1342 At each iteration, node rejection (deletion) is attempted for each node. The
1343 node rejection, causing the most significant drop in the average pairwise
1344 distance (RMSD) in the cluster, is retained. This procedure is repeated
1345 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1346 2) the cluster gets exhausted or 3) node rejection no longer
1347 causes a drop in the average distance (not converging).
1348
1349 @param items: cluster members
1350 @type items: iterable of L{ClusterNode}s
1351 @param threshold: RMSD threshold; continue shrinking until the mean distance
1352 drops below this value (default=1.5)
1353 @type threshold: float
1354 @param connectedness: use only nodes which are connected to at least c% of all
1355 initial nodes (default=0.5, that means 50%)
1356 @type connectedness: float
1357 """
1358
1359 MIN_LENGTH = 6
1360
1361 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1395
1396 @property
1398 return len(self._items)
1399
1400 @property
1402 return tuple(self._items)
1403
1404 @property
1406 return tuple(i.fragment for i in self._items)
1407
1408 @property
1410 return self._threshold
1411 @threshold.setter
1413 self._threshold = float(value)
1414
1415 @property
1417 return self._connectedness
1418
1420
1421 d = []
1422
1423 for i in self._matrix:
1424 if skip is i:
1425 continue
1426
1427 for j in self._matrix[i]:
1428 if skip is not j:
1429 d.append(self._matrix[i][j])
1430
1431 return d
1432
1434
1435 if j in self._matrix[i]:
1436 return self._matrix[i][j]
1437 else:
1438 return None
1439
1440 - def mean(self, skip=None):
1441 """
1442 @return: the current mean distance in the cluster
1443 @rtype: float
1444 """
1445 if self._edges == 0:
1446 raise ClusterExhaustedError()
1447
1448 if not skip:
1449 return float(self._weight) / self._edges
1450
1451 else:
1452 weight = self._weight - 2 * skip.weight
1453 edges = self._edges - 2 * len(self._matrix[skip])
1454
1455 if edges < 1:
1456 return 0
1457 else:
1458 return float(weight) / edges
1459
1461 """
1462 @return: the current representative fragment
1463 @rtype: L{ClusterRep}
1464
1465 @note: the cluster rep is the node with the lowest average distance
1466 to all other nodes. If a fixed fragment exists, structurally similar
1467 to the rep, but longer, this fragment may be suggested as an alternative
1468 (see also L{ClusterRep}).
1469 """
1470
1471 alt = None
1472 cen = None
1473 avg = None
1474
1475 for i in self._matrix:
1476
1477 curravg = float(i.weight) / len(self._matrix[i])
1478 conn = len(self._matrix[i]) / float(self.count)
1479
1480 if avg is None or (curravg < avg and conn >= self.connectedness):
1481 avg = curravg
1482 cen = i
1483 elif curravg == avg:
1484 if i.fragment.length > cen.fragment.length:
1485 cen = i
1486
1487 d = self._distances()
1488 mean = numpy.mean(d)
1489 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1490
1491 for i in self._matrix:
1492 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1493 distance = self._distance(i, cen)
1494 if distance is not None and distance < 0.5 * self.threshold:
1495 if alt is None or alt.fragment.length < i.fragment.length:
1496 alt = i
1497
1498 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1499 rejections=(self._initcount - self.count))
1500
1502 """
1503 Remove C{item} from the cluster.
1504
1505 @type item: L{ClusterNode}
1506 @raise ClusterExhaustedError: if this is the last remaining item
1507 """
1508 if self.count == 1:
1509 raise ClusterExhaustedError()
1510
1511 assert not item.fixed
1512
1513 for i in self._matrix:
1514 if item in self._matrix[i]:
1515 distance = self._matrix[i][item]
1516 self._weight -= 2 * distance
1517 i.weight -= distance
1518
1519 del self._matrix[i][item]
1520 self._edges -= 1
1521
1522 self._edges -= len(self._matrix[item])
1523 del self._matrix[item]
1524 self._items.remove(item)
1525
1527 """
1528 Shrink the cluster by a single node.
1529
1530 @return: True on successful shrink, False otherwise (e.g. if
1531 already converged)
1532 @rtype: bool
1533 @raise ClusterExhaustedError: if exhausted
1534 @raise ClusterDivergingError: if not converging
1535 """
1536
1537 mean = self.mean()
1538 if mean <= self.threshold or self.count == 1:
1539 return False
1540
1541 m = {}
1542
1543 for i in self._matrix:
1544 if not i.fixed:
1545 newmean = self.mean(skip=i)
1546 m[newmean] = i
1547
1548 if len(m) == 0:
1549 raise ClusterExhaustedError()
1550
1551 newmean = min(m)
1552
1553 if newmean > mean:
1554 raise ClusterDivergingError()
1555 elif newmean < mean:
1556 junk = m[newmean]
1557 self.reject(junk)
1558 return True
1559 else:
1560 return False
1561
1562 - def shrink(self, minitems=2):
1563 """
1564 Start automatic shrinking.
1565
1566 @param minitems: absolute minimum of the number of nodes in the cluster
1567 @type minitems: int
1568
1569 @return: cluster's representative: the node with the lowest average
1570 distance to all other nodes in the cluster
1571 @rtype: L{ClusterRep}
1572
1573 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1574 still not converged
1575 """
1576
1577 if self.count > minitems:
1578
1579 while self.shrinkone():
1580 if self.count <= minitems:
1581 raise ClusterExhaustedError()
1582 else:
1583 raise ClusterExhaustedError()
1584
1585 return self.centroid()
1586
1588 """
1589 Cluster node.
1590
1591 @param fragment: fragment
1592 @type fragment: L{Assignment}
1593 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1594 @type distance: str
1595 @param fixed: mark this node as fixed (cannot be rejected)
1596 @type fixed: bool
1597 """
1598
1599 FIXED = 0.7
1600
1601 @staticmethod
1613
1624
1626 """
1627 @return: the distance between self and another node
1628 @type other: L{ClusterNode}
1629 @rtype: float
1630 """
1631 return self._distance(other.fragment)
1632
1634 """
1635 Cluster's representative (centroid) node. This object carries the
1636 result of shrinking itself.
1637
1638 @param centroid: rep node
1639 @type centroid: L{ClusterNode}
1640 @param mean: current mean distance in the cluster
1641 @type mean: float
1642 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1643 @type consistency: float
1644 @param count: current number of nodes in the cluster
1645 @type count: int
1646 @param rejections: total number of rejections
1647 @type rejections: int
1648 @param alternative: suggested cluster rep alternative (e.g. structurally
1649 similar to the centroid, but longer)
1650 @type param:
1651 """
1652
1653 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1666
1667 @property
1669 """
1670 Confidence of assignment: log10(count) * consistency
1671 """
1672 if self.count <= 0 or self.count is None or self.consistency is None:
1673 return 0
1674 else:
1675 return numpy.log10(self.count) * self.consistency
1676
1677 @property
1679 return self._centroid
1680
1681 @property
1683 return self._alternative
1684
1685 @property
1687 return self._alternative is not None
1688
1689 @property
1692
1693 @property
1695 return self._consistency
1696
1697 @property
1700
1701 @property
1703 return self._rejections
1704
1706 """
1707 If an alternative is available, swap the centroid and the alternative.
1708 """
1709
1710 if self._alternative is not None:
1711
1712 centroid = self._centroid
1713 self._centroid = self._alternative
1714 self._alternative = centroid
1715
1721
1723
1724 @staticmethod
1738
1739 - def __init__(self, center, qstart, qend):
1749
1750 @property
1753
1754 @property
1757
1758 @property
1761
1762 @property
1765
1766 @property
1769
1770 @property
1773
1774 - def chain(self, source):
1776
1779
1835
1838
1839 - def __init__(self, residue, confidence=0, count=0, confident=True, gap=False, rep=None):
1847
1848 @property
1851
1852 @property
1855
1856 @property
1858 if self.rep:
1859 return self.rep.torsion_at(self.rank, self.rank)[0]
1860 else:
1861 return None
1862
1865 """
1866 Simplifies the construction of fragment libraries.
1867 """
1868
1872
1889
1891 """
1892 Build a fixed-length fragment library from a list of
1893 variable-length L{Assignment}s.
1894
1895 @param fragments: source fragments
1896 @type fragments: iterable of L{RosettaFragment}s
1897 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1898 @type window: int
1899
1900 @return: fixed-length fragment library
1901 @rtype: L{RosettaFragmentMap}
1902 """
1903
1904 frags = []
1905
1906 for f in fragments:
1907 for qs in range(f.qstart, f.qend - window + 1):
1908 frags.append(f.subregion(qs, qs + window - 1))
1909
1910 return self.rosetta.RosettaFragmentMap(frags)
1911
1912 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1913 """
1914 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1915 The regions to be complemented are determined by calculating the confidence
1916 at each residue (by filtering).
1917
1918
1919 @param target: target protein
1920 @type target: L{Target}
1921 @param filling: additional fragments to place in the low-conf regions
1922 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1923 @param threshold: confidence threshold
1924 @type threshold: float
1925
1926 @return: complemented fragment library
1927 @rtype: L{RosettaFragmentMap}
1928 """
1929
1930 fragmap = self.make_fragset(target)
1931 covered = set()
1932
1933 for r in target.residues:
1934
1935 if r.assignments.length == 0:
1936 if callback:
1937 callback(ResidueEventInfo(r.native, gap=True))
1938 continue
1939
1940 cluster = r.filter()
1941 if cluster is None:
1942 if callback:
1943 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1944 continue
1945
1946 if cluster.confidence >= threshold:
1947 covered.add(r.native.rank)
1948 confident = True
1949 else:
1950 confident = False
1951
1952 if callback:
1953 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
1954
1955 for r in target.residues:
1956 if r.native.rank not in covered:
1957 fragmap.mark_unconfident(r.native.rank)
1958
1959 for frag in filling:
1960 fragmap.complement(frag)
1961
1962 return fragmap
1963
1965 """
1966 Builed a filtered fragment library (by clustering), containing only
1967 representative fragments (cluster centroids).
1968
1969 @param target: target protein
1970 @type target: L{Target}
1971 @param extend: if True, pick alternative reps if available
1972 @type extend: bool
1973
1974 @return: filtered fragment library
1975 @rtype: L{RosettaFragmentMap}
1976 """
1977
1978 fragments = []
1979
1980 for r in target.residues:
1981 if r.assignments.length == 0:
1982 if callback:
1983 callback(ResidueEventInfo(r.native, gap=True))
1984 continue
1985
1986 cluster = r.filter(extend=extend)
1987 if cluster is None:
1988 if callback:
1989 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1990
1991 if extend and cluster.has_alternative:
1992 best = cluster.alternative
1993 else:
1994 best = cluster.centroid
1995
1996 fragment = self.rosetta.RosettaFragment.from_object(best)
1997 fragments.append(fragment)
1998 if callback:
1999 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
2000
2001 fragments.sort()
2002 return self.rosetta.RosettaFragmentMap(fragments, target.length)
2003
2004 - def mix(self, *fragsets):
2024
2027
2029
2030 FACTORY = None
2031 DSN = None
2032
2033 - def __init__(self, factory=None, dsn=None):
2034
2035 self.factory = factory or self.__class__.FACTORY
2036 self.cs = dsn or self.__class__.DSN
2037 self.connection = None
2038 self.cursor = None
2039
2041
2042 self.connection = self.factory(self.cs)
2043 try:
2044 self.cursor = self.connection.cursor()
2045 except:
2046 self.connection.close()
2047 raise
2048 return self
2049
2051 try:
2052 if not self.cursor.closed:
2053 self.cursor.close()
2054 finally:
2055 if not self.connection.closed:
2056 self.connection.close()
2057
2078
2079 @staticmethod
2080 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
2091
2098
2105
2112
2119
2120 - def scores(self, benchmark_id, type):
2126
2133
2142
2143 - def structure(self, accession, chain=None):
2144
2145 pdbfile = self._find(accession, self._pdb)
2146
2147 if not pdbfile and chain:
2148 pdbfile = self._find(accession + chain, self._pdb)
2149
2150 if not pdbfile:
2151 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
2152
2153 return self._parser(pdbfile).parse_structure()
2154
2155 - def prediction(self, target_id, type, ss=False):
2156
2157 info = self.target_details(target_id)
2158 if not info:
2159 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
2160 row = info[0]
2161
2162 id = row["Accession"]
2163 length = float(row["Length"])
2164 overlap = float(row["MaxOverlap"]) / (length or 1.)
2165
2166 native = self.structure(id[:4], id[4]).chains[id[4]]
2167 segments = self.target_segments(target_id)
2168 target = self._factory.target(id, length, native.residues, overlap, segments)
2169
2170 source = None
2171
2172 for row in self.assignments(target_id, type):
2173
2174 src_accession = row['Source'][:4]
2175 src_chain = row['Source'][4]
2176
2177 if source is None or source.accession != src_accession:
2178 try:
2179 source = self.structure(src_accession, src_chain)
2180 except (IOError, ValueError) as ex:
2181 target.errors.append(ex)
2182 continue
2183
2184 if src_chain == '_':
2185 frag_chain = source.first_chain
2186 else:
2187 frag_chain = source.chains[src_chain]
2188 if not frag_chain.has_torsion:
2189 frag_chain.compute_torsion()
2190
2191 fragment = self._factory.assignment(
2192 source=frag_chain,
2193 start=row['SourceStart'],
2194 end=row['SourceEnd'],
2195 id=row['FragmentName'],
2196 qstart=row['Start'],
2197 qend=row['End'],
2198 probability=row['Probability'],
2199 score=row['Score'],
2200 neff=row['Neff'],
2201 rmsd=row['RMSD'],
2202 tm_score=row['TMScore'],
2203 segment=row['SegmentStart'],
2204 internal_id=row['InternalID'])
2205
2206 target.assign(fragment)
2207
2208 if ss:
2209 self._attach_sec_structure(target, target_id, type)
2210
2211 return target
2212
2228