1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
21
22 from csb.bio.structure import SecondaryStructure
32
38
39 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
40 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
41 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
42 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
43 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
44 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
45 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
46 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
47 53: 11.831245255954073, 54: 11.918975893263905 }
50 """
51 Base class, representing a match between a fragment and its target.
52 """
53
54 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
63
64 @property
67
68 @property
71
72 @property
75
76 @property
79
80 @property
83
84 @property
87
88 @property
90 return self._probability
91
92 @property
95
96 @property
98 raise NotImplementedError()
99
100 @property
102 raise NotImplementedError()
103
104 @property
106 raise NotImplementedError()
107
109
110 - def __init__(self, target, isites_prediction, hmm_prediction, combined_prediction):
111
112 self.target = target
113
114 self.isites = isites_prediction
115 self.hmm = hmm_prediction
116 self.combined = combined_prediction
117
119
120 - def __init__(self, alignment, coordinates):
124
126 """
127 Fragment-based phi/psi angles predictor.
128
129 @param target: target protein, containing fragment assignments
130 @type target: L{Target}
131 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
132 @type threshold: float
133 @param extend: pick alternative, longer cluster reps, if possible
134 @type extend: bool
135 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
136 will be performed on demand (the first time C{predictor.compute()} is invoked)
137
138 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
139 time. Subsequent calls will be very fast.
140 """
141
142 - def __init__(self, target, threshold=1.5, extend=False, init=False):
143
144 if not isinstance(target, Target):
145 raise TypeError(target)
146 if target.matches.length == 0:
147 raise ValueError('This target has no fragment assignments')
148
149 self._target = target
150 self._threshold = float(threshold)
151 self._extend = bool(extend)
152
153 self._initialized = False
154 self._reps = {}
155 self._clusters = {}
156
157 if init:
158 self.init()
159
160 @property
163
164 @property
166 return self._threshold
167
168 @property
171
192
208
216
239
272
274 """
275 Filter the current fragment map and create a new, completely flat,
276 non-overlapping map built from centroids, assigned iteratively by
277 decreasing confidence. Centroids with lower confidence which overlap
278 with previously assigned centroids will be trimmed to fill existing
279 gaps only.
280
281 @return: L{TorsionPredictionInfo} instances, one for each target residue
282 @rtype: tuple of L{TorsionPredictionInfo}
283 """
284
285 if not self._initialized:
286 self.init()
287
288 prediction = []
289 slots = set(range(1, self.target.length + 1))
290
291 reps = list(self._reps.values())
292 reps.sort(key=lambda i: i.confidence, reverse=True)
293
294 for rep in reps:
295
296 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
297 if rank in slots:
298 torsion = rep.centroid.torsion_at(rank, rank)[0]
299 ss = rep.centroid.sec_structure_at(rank, rank)[0]
300 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
301
302 prediction.append(info)
303 slots.remove(rank)
304
305 for rank in slots:
306 prediction.append(TorsionPredictionInfo(rank, 0, None))
307
308 prediction.sort(key=lambda i: i.rank)
309 return tuple(prediction)
310
312 """
313 Extract all torsion angles coming from all fragments, which had survived
314 the filtering and cover residue C{#rank}.
315
316 @param rank: target residue rank
317 @type rank: int
318
319 @return: all L{TorsionAngles} for a cluster at the specified residue
320 @rtype: tuple of L{TorsionAngles}
321 """
322
323 if not self._initialized:
324 self.init()
325 if rank not in self._clusters:
326 return tuple()
327
328 angles = []
329
330 for node in self._clusters[rank]:
331 fragment = node.fragment
332 torsion = fragment.torsion_at(rank, rank)[0]
333 angles.append(torsion)
334
335 return tuple(angles)
336
339 """
340 Struct container for a single torsion angle prediction.
341
342 @param rank: target residue rank
343 @type rank: int
344 @param confidence: confidence of prediction
345 @type confidence: float
346 @param torsion: assigned phi/psi/omega angles
347 @type torsion: L{TorsionAngles}
348 @param dssp: assigned secondary structure
349 @type dssp: L{SecondaryStructureElement}
350 @param primary: if True, designates that the assigned angles are extracted
351 from the L{ClusterRep} at residue C{#rank}; otherwise: the
352 angles are coming from another, overlapping L{ClusterRep}
353
354 """
355
356 - def __init__(self, rank, confidence, torsion, dssp, primary=False):
363
370
372 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
373
376
388
399
400 -class Target(csb.core.AbstractNIContainer):
401 """
402 Represents a protein structure prediction target.
403
404 @param id: target sequence ID, in PDB accnC format
405 @type id: str
406 @param length: total target sequence length
407 @type length: int
408 @param residues: a list, containing target's residues. See also
409 L{Target.from_sequence}
410 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
411 """
412
432
433 @staticmethod
454
455 @staticmethod
468
469 @staticmethod
474
475 @property
477 return self._residues
478
479 @property
482
483 @property
486
487 @property
489 return self._accession
490
491 @property
493 return self._chain_id
494
495 @property
498
499 @property
502
503 @property
506
507 @property
509 return self._assignments
510
511 @property
513 return self._residues
514
515 @property
517 return self._segments
518
540
542 """
543 Assign a bunch of fragments at once.
544 @type fragments: iterable of L{Assignment}s
545 """
546 for frag in fragments:
547 self.assign(frag)
548
549 - def filter(self, threshold=1.5, extend=False):
550 """
551 Filter the current fragment map using a L{FragmentCluster}.
552
553 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
554 @type threshold: float
555 @param extend: pick extended alternatives where possible (default=False)
556 @type extend: bool
557
558 @return: a new target, containing only cluster centroids/reps
559 @rtype: L{Target}
560 """
561
562 target = self.clone()
563
564 for residue in self.residues:
565 rep = residue.filter(threshold=threshold, extend=extend)
566
567 if rep is not None:
568 target.assign(rep.centroid)
569
570 return target
571
585
587
588 - def __init__(self, id, length, residues, overlap=None):
592
603
607
609 """
610 Wrapper around L{Target}'s native residues. Decorates them with additional,
611 fragment-related methods.
612
613 @type native_residue: L{csb.bio.structure.ProteinResidue}
614 """
615
621
622 @property
625
626 @property
629
630 @property
632 return self._assignments
633
634 - def assign(self, assignment_info):
635 self._assignments._append_item(assignment_info)
636
655
657 """
658 Filter all fragments, covering this position in the L{Target} using a
659 L{FragmentCluster}.
660
661 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
662 @type method: str
663 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
664 @type threshold: float
665 @param extend: pick extended alternative where possible (default=False)
666 @type extend: bool
667
668 @return: cluster's representative (if converged) or None
669 @rtype: L{ClusterRep} or None
670 """
671
672 try:
673 nodes = []
674 for ai in self.assignments:
675 node = ClusterNode.create(ai.fragment, method, extend)
676 nodes.append(node)
677
678 cluster = FragmentCluster(nodes, threshold=threshold)
679
680 center = cluster.shrink(minitems=0)
681 if center.has_alternative:
682 center.exchange()
683
684 return center
685
686 except (ClusterExhaustedError, ClusterDivergingError):
687 return None
688
701
703 """
704 @return: the residue-wise precision of the fragment library at the
705 current position (percentage).
706
707 @param threshold: true-positive RMSD cutoff (default=1.5)
708 @type threshold: float
709 @rtype: float
710 """
711
712 if self.assignments.length < 1:
713 return None
714 else:
715 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
716 pos = len(positive) * 100.0 / self.assignments.length
717
718 return pos
719
738
875
906
908
909 binsize = float(binsize)
910 bins = numpy.ceil(numpy.array(data) / binsize)
911
912 hist = dict.fromkeys(bins, 0)
913 for bin in bins:
914 hist[bin] += (1.0 / len(bins))
915
916 freq = numpy.array(hist.values())
917 return - numpy.sum(freq * numpy.log(freq))
918
920
921 rmsds = self.pairwise_rmsd()
922 return self._entropy(rmsds, binsize)
923
928
930
931 rmsds = self.pairwise_rmsd()
932
933 if len(rmsds) < 1:
934 return None
935
936 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
937
939
940 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
941
942 if len(sa_rmsds) < 1:
943 return None
944
945 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
946
953
955
956 cons = self.rmsd_consistency()
957
958 if cons is None:
959 return 0
960 else:
961 return numpy.log10(self.count) * cons
962
964
973
974 @property
976 return self._assignment.backbone[self._relrank]
977
978 @property
980 return self._assignment
981
983 """
984 Represents a match between a fragment and its target.
985
986 @param source: source structure (must have torsion angles precomputed)
987 @type source: L{csb.bio.structure.Chain}
988 @param start: start position in C{source} (rank)
989 @type start: int
990 @param end: end position in C{source} (rank)
991 @type end: int
992 @param id: fragment ID
993 @type id: str
994 @param qstart: start position in target (rank)
995 @type qstart: int
996 @param qend: end position in target (rank)
997 @type qend: int
998 @param probability: probability of assignment
999 @type probability: float
1000 @param rmsd: RMSD of the fragment, compared to target's native structure
1001 @type rmsd: float
1002 """
1003
1004 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
1005 tm_score=None, score=None, neff=None, segment=None, internal_id=None):
1006
1007 assert source.has_torsion
1008 sub = source.subregion(start, end, clone=True)
1009 try:
1010 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
1011 except csb.core.ItemNotFoundError:
1012 raise csb.bio.structure.Broken3DStructureError()
1013 torsion = [r.torsion.copy() for r in sub.residues]
1014
1015 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
1016 self._torsion = torsion
1017 self._sequence = sub.sequence
1018
1019 self._source_id = source.accession[:4] + source.id
1020 self._start = start
1021 self._end = end
1022
1023 self._score = score
1024 self._neff = neff
1025 self._ss = None
1026
1027 self._segment_start = segment
1028 self.internal_id = internal_id
1029
1030 if id is None:
1031 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
1032
1033 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
1034
1035 self._ss = SecondaryStructure('-' * self.length)
1036
1037 @staticmethod
1060
1061 @property
1064
1065 @property
1067 return self._sequence
1068
1069 @property
1071 return self._torsion
1072
1073 @property
1075 return self._source_id
1076
1077 @property
1080
1081 @property
1084
1085 @property
1088
1089 @property
1092
1093 @property
1095 return self._segment_start
1096
1097 @property
1100 @secondary_structure.setter
1109
1119
1124
1126 """
1127 @return: True if the fragment is centered around position=C{rank}.
1128 @rtype: bool
1129 """
1130
1131 if self.qstart < rank < self.qend:
1132 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
1133 return True
1134
1135 return False
1136
1138 """
1139 @return: the CA coordinates of the fragment at the specified subregion.
1140 @rtype: list
1141 """
1142
1143 self._check_range(qstart, qend)
1144
1145 relstart = qstart - self.qstart
1146 relend = qend - self.qstart + 1
1147
1148 return self.backbone[relstart : relend]
1149
1151 """
1152 @return: the torsion angles of the fragment at the specified subregion.
1153 @rtype: list
1154 """
1155
1156 self._check_range(qstart, qend)
1157
1158 relstart = qstart - self.qstart
1159 relend = qend - self.qstart + 1
1160
1161 return self.torsion[relstart : relend]
1162
1164
1165 self._check_range(qstart, qend)
1166
1167 relstart = qstart - self.qstart
1168 relend = qend - self.qstart + 1
1169
1170 return sa_string[relstart : relend]
1171
1179
1191
1192 - def chain_at(self, source, qstart, qend):
1200
1202 """
1203 @type other: L{Assignment}
1204 @return: target positions, covered by both C{self} and C{other}
1205 @rtype: set of int
1206 """
1207
1208 qranks = set(range(self.qstart, self.qend + 1))
1209 sranks = set(range(other.qstart, other.qend + 1))
1210
1211 return qranks.intersection(sranks)
1212
1213 - def rmsd_to(self, other, min_overlap=5):
1214 """
1215 @return: the CA RMSD between C{self} and C{other}.
1216
1217 @param other: another fragment
1218 @type other: L{Assignment}
1219 @param min_overlap: require at least that number of overlapping residues
1220 (return None if not satisfied)
1221 @type min_overlap: int
1222
1223 @rtype: float
1224 """
1225 if self is other:
1226 return 0
1227
1228 common = self.overlap(other)
1229
1230 if len(common) >= min_overlap:
1231
1232 qstart, qend = min(common), max(common)
1233
1234 q = self.backbone_at(qstart, qend)
1235 s = other.backbone_at(qstart, qend)
1236
1237 if len(q) > 0 and len(s) > 0:
1238 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
1239
1240 return None
1241
1242 - def nrmsd_to(self, other, min_overlap=5):
1257
1258 - def mda_to(self, other, min_overlap=5):
1259
1260 common = self.overlap(other)
1261
1262 if len(common) >= min_overlap:
1263
1264 qstart, qend = min(common), max(common)
1265
1266 q = self.torsion_at(qstart, qend)
1267 s = other.torsion_at(qstart, qend)
1268
1269 if len(q) > 0 and len(s) > 0:
1270
1271 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:])
1272 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1])
1273
1274 return max(maxphi, maxpsi)
1275
1276 return None
1277
1278 - def to_rosetta(self, source, qstart=None, qend=None, weight=None):
1279 """
1280 @deprecated: this method will be deleted soon. Use
1281 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1282 """
1283 stream = csb.io.MemoryStream()
1284
1285 if weight is None:
1286 weight = self.probability
1287 if not qstart:
1288 qstart = self.qstart
1289 if not qend:
1290 qend = self.qend
1291
1292 source.compute_torsion()
1293 chain = self.chain_at(source, qstart, qend)
1294
1295 for i, r in enumerate(chain.residues):
1296
1297 acc = self.source_id[:4]
1298 ch = self.source_id[4].upper()
1299
1300 start = qstart - self.qstart + self.start + i
1301 aa = r.type
1302 ss = 'L'
1303 phi, psi, omega = 0, 0, 0
1304 if r.torsion.phi:
1305 phi = r.torsion.phi
1306 if r.torsion.psi:
1307 psi = r.torsion.psi
1308 if r.torsion.omega:
1309 omega = r.torsion.omega
1310
1311 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1312
1313 return stream.getvalue()
1314
1316
1317 BIT_SCORE_THRESHOLD = 1.1
1318
1319 - def __init__(self, source, start, end, qstart, qend, window, score, rmsd):
1320
1321 self._window = window
1322
1323 super(ChemShiftAssignment, self).__init__(
1324 source, start, end, qstart, qend, id=None, probability=1.0,
1325 rmsd=rmsd, tm_score=None, score=score, neff=None, segment=None, internal_id=None)
1326
1327 @property
1330
1333
1336
1339
1341 """
1342 Provides clustering/filtering of the fragments, covering a common residue
1343 in the target. Clustering is done via iterative shrinking of the cluster.
1344 At each iteration, node rejection (deletion) is attempted for each node. The
1345 node rejection, causing the most significant drop in the average pairwise
1346 distance (RMSD) in the cluster, is retained. This procedure is repeated
1347 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1348 2) the cluster gets exhausted or 3) node rejection no longer
1349 causes a drop in the average distance (not converging).
1350
1351 @param items: cluster members
1352 @type items: iterable of L{ClusterNode}s
1353 @param threshold: RMSD threshold; continue shrinking until the mean distance
1354 drops below this value (default=1.5)
1355 @type threshold: float
1356 @param connectedness: when calculating centroids, consider only nodes
1357 connected to at least c% of all surviving vertices
1358 (default=0.5)
1359 @type connectedness: float
1360 """
1361
1362 MIN_LENGTH = 6
1363
1364 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1401
1402 @property
1404 return len(self._items)
1405
1406 @property
1408 return tuple(self._items)
1409
1410 @property
1412 return tuple(i.fragment for i in self._items)
1413
1414 @property
1416 return self._threshold
1417 @threshold.setter
1419 self._threshold = float(value)
1420
1421 @property
1423 return self._connectedness
1424
1426
1427 d = []
1428
1429 for i in self._matrix:
1430 if skip is i:
1431 continue
1432
1433 for j in self._matrix[i]:
1434 if skip is not j:
1435 d.append(self._matrix[i][j])
1436
1437 return d
1438
1440
1441 if j in self._matrix[i]:
1442 return self._matrix[i][j]
1443 else:
1444 return None
1445
1446 - def mean(self, skip=None):
1447 """
1448 @return: the current mean distance in the cluster
1449 @rtype: float
1450 """
1451 if self._edges == 0:
1452 raise ClusterExhaustedError()
1453
1454 if not skip:
1455 return float(self._weight) / self._edges
1456
1457 else:
1458 weight = self._weight - skip.weight
1459 edges = self._edges - len(self._matrix[skip])
1460
1461 if edges < 1:
1462 return 0
1463 else:
1464 return float(weight) / edges
1465
1467 """
1468 @return: the current representative fragment
1469 @rtype: L{ClusterRep}
1470
1471 @note: the cluster rep is the node with the lowest average distance
1472 to all other nodes. If a fixed fragment exists, structurally similar
1473 to the rep, but longer, this fragment may be suggested as an alternative
1474 (see also L{ClusterRep}).
1475 """
1476
1477 alt = None
1478 cen = None
1479 avg = None
1480
1481 for i in self._matrix:
1482 edges = len(self._matrix[i]) or (1.0 / self.count)
1483 curravg = float(i.weight) / edges
1484 conn = len(self._matrix[i]) / float(self.count)
1485
1486 if avg is None or (curravg < avg and conn >= self.connectedness):
1487 avg = curravg
1488 cen = i
1489 elif curravg == avg:
1490 if i.fragment.length > cen.fragment.length:
1491 cen = i
1492
1493 d = self._distances()
1494 mean = numpy.mean(d)
1495 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1496
1497 for i in self._matrix:
1498 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1499 distance = self._distance(i, cen)
1500 if distance is not None and distance < 0.5 * self.threshold:
1501 if alt is None or alt.fragment.length < i.fragment.length:
1502 alt = i
1503
1504 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1505 rejections=(self._initcount - self.count))
1506
1508 """
1509 Remove C{item} from the cluster.
1510
1511 @type item: L{ClusterNode}
1512 @raise ClusterExhaustedError: if this is the last remaining item
1513 """
1514 if self.count == 1:
1515 raise ClusterExhaustedError()
1516
1517 assert not item.fixed
1518
1519 for i in self._matrix:
1520 if item in self._matrix[i]:
1521 distance = self._matrix[i][item]
1522 self._weight -= distance
1523 i.weight -= distance
1524
1525 del self._matrix[i][item]
1526 self._edges -= 1
1527
1528 del self._matrix[item]
1529 self._items.remove(item)
1530
1532 """
1533 Shrink the cluster by a single node.
1534
1535 @return: True on successful shrink, False otherwise (e.g. if
1536 already converged)
1537 @rtype: bool
1538 @raise ClusterExhaustedError: if exhausted
1539 @raise ClusterDivergingError: if not converging
1540 """
1541
1542 mean = self.mean()
1543 if mean <= self.threshold or self.count == 1:
1544 return False
1545
1546 m = {}
1547
1548 for i in self._matrix:
1549 if not i.fixed:
1550 newmean = self.mean(skip=i)
1551 m[newmean] = i
1552
1553 if len(m) == 0:
1554 raise ClusterExhaustedError()
1555
1556 newmean = min(m)
1557
1558 if newmean > mean:
1559 raise ClusterDivergingError()
1560 elif newmean < mean:
1561 junk = m[newmean]
1562 self.reject(junk)
1563 return True
1564 else:
1565 return False
1566
1567 - def shrink(self, minitems=2):
1568 """
1569 Start automatic shrinking.
1570
1571 @param minitems: absolute minimum of the number of nodes in the cluster
1572 @type minitems: int
1573
1574 @return: cluster's representative: the node with the lowest average
1575 distance to all other nodes in the cluster
1576 @rtype: L{ClusterRep}
1577
1578 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1579 still not converged
1580 """
1581
1582 if self.count > minitems:
1583
1584 while self.shrinkone():
1585 if self.count <= minitems:
1586 raise ClusterExhaustedError()
1587 else:
1588 raise ClusterExhaustedError()
1589
1590 return self.centroid()
1591
1593 """
1594 Cluster node.
1595
1596 @param fragment: fragment
1597 @type fragment: L{Assignment}
1598 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1599 @type distance: str
1600 @param fixed: mark this node as fixed (cannot be rejected)
1601 @type fixed: bool
1602 """
1603
1604 FIXED = 0.7
1605
1606 @staticmethod
1618
1629
1631 """
1632 @return: the distance between self and another node
1633 @type other: L{ClusterNode}
1634 @rtype: float
1635 """
1636 return self._distance(other.fragment)
1637
1639 """
1640 Cluster's representative (centroid) node. This object carries the
1641 result of shrinking itself.
1642
1643 @param centroid: rep node
1644 @type centroid: L{ClusterNode}
1645 @param mean: current mean distance in the cluster
1646 @type mean: float
1647 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1648 @type consistency: float
1649 @param count: current number of nodes in the cluster
1650 @type count: int
1651 @param rejections: total number of rejections
1652 @type rejections: int
1653 @param alternative: suggested cluster rep alternative (e.g. structurally
1654 similar to the centroid, but longer)
1655 @type param:
1656 """
1657
1658 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1671
1672 @property
1674 """
1675 Confidence of assignment: log10(count) * consistency
1676 """
1677 if self.count <= 0 or self.count is None or self.consistency is None:
1678 return 0
1679 else:
1680 return numpy.log10(self.count) * self.consistency
1681
1682 @property
1684 return self._centroid
1685
1686 @property
1688 return self._alternative
1689
1690 @property
1692 return self._alternative is not None
1693
1694 @property
1697
1698 @property
1700 return self._consistency
1701
1702 @property
1705
1706 @property
1708 return self._rejections
1709
1711 """
1712 If an alternative is available, swap the centroid and the alternative.
1713 """
1714
1715 if self._alternative is not None:
1716
1717 centroid = self._centroid
1718 self._centroid = self._alternative
1719 self._alternative = centroid
1720
1726
1728
1729 @staticmethod
1743
1744 - def __init__(self, center, qstart, qend):
1754
1755 @property
1758
1759 @property
1762
1763 @property
1766
1767 @property
1770
1771 @property
1774
1775 @property
1778
1779 - def chain(self, source):
1781
1784
1840
1843
1844 - def __init__(self, residue, confidence=0, count=0, confident=True, gap=False, rep=None):
1852
1853 @property
1856
1857 @property
1860
1861 @property
1863 if self.rep:
1864 return self.rep.torsion_at(self.rank, self.rank)[0]
1865 else:
1866 return None
1867
1870 """
1871 Simplifies the construction of fragment libraries.
1872 """
1873
1877
1894
1896 """
1897 Build a fixed-length fragment library from a list of
1898 variable-length L{Assignment}s.
1899
1900 @param fragments: source fragments
1901 @type fragments: iterable of L{RosettaFragment}s
1902 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1903 @type window: int
1904
1905 @return: fixed-length fragment library
1906 @rtype: L{RosettaFragmentMap}
1907 """
1908
1909 frags = []
1910
1911 for f in fragments:
1912 for qs in range(f.qstart, f.qend - window + 1):
1913 frags.append(f.subregion(qs, qs + window - 1))
1914
1915 return self.rosetta.RosettaFragmentMap(frags)
1916
1917 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1918 """
1919 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1920 The regions to be complemented are determined by calculating the confidence
1921 at each residue (by filtering).
1922
1923
1924 @param target: target protein
1925 @type target: L{Target}
1926 @param filling: additional fragments to place in the low-conf regions
1927 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1928 @param threshold: confidence threshold
1929 @type threshold: float
1930
1931 @return: complemented fragment library
1932 @rtype: L{RosettaFragmentMap}
1933 """
1934
1935 fragmap = self.make_fragset(target)
1936 covered = set()
1937
1938 for r in target.residues:
1939
1940 if r.assignments.length == 0:
1941 if callback:
1942 callback(ResidueEventInfo(r.native, gap=True))
1943 continue
1944
1945 cluster = r.filter()
1946 if cluster is None:
1947 if callback:
1948 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1949 continue
1950
1951 if cluster.confidence >= threshold:
1952 covered.add(r.native.rank)
1953 confident = True
1954 else:
1955 confident = False
1956
1957 if callback:
1958 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
1959
1960 for r in target.residues:
1961 if r.native.rank not in covered:
1962 fragmap.mark_unconfident(r.native.rank)
1963
1964 for frag in filling:
1965 fragmap.complement(frag)
1966
1967 return fragmap
1968
1970 """
1971 Builed a filtered fragment library (by clustering), containing only
1972 representative fragments (cluster centroids).
1973
1974 @param target: target protein
1975 @type target: L{Target}
1976 @param extend: if True, pick alternative reps if available
1977 @type extend: bool
1978
1979 @return: filtered fragment library
1980 @rtype: L{RosettaFragmentMap}
1981 """
1982
1983 fragments = []
1984
1985 for r in target.residues:
1986 if r.assignments.length == 0:
1987 if callback:
1988 callback(ResidueEventInfo(r.native, gap=True))
1989 continue
1990
1991 cluster = r.filter(extend=extend)
1992 if cluster is None:
1993 if callback:
1994 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1995 continue
1996
1997 if extend and cluster.has_alternative:
1998 best = cluster.alternative
1999 else:
2000 best = cluster.centroid
2001
2002 fragment = self.rosetta.RosettaFragment.from_object(best)
2003 fragments.append(fragment)
2004 if callback:
2005 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
2006
2007 fragments.sort()
2008 return self.rosetta.RosettaFragmentMap(fragments, target.length)
2009
2010 - def mix(self, *fragsets):
2030
2033
2035
2036 FACTORY = None
2037 DSN = None
2038
2039 - def __init__(self, factory=None, dsn=None):
2040
2041 self.factory = factory or self.__class__.FACTORY
2042 self.cs = dsn or self.__class__.DSN
2043 self.connection = None
2044 self.cursor = None
2045
2047
2048 self.connection = self.factory(self.cs)
2049 try:
2050 self.cursor = self.connection.cursor()
2051 except:
2052 self.connection.close()
2053 raise
2054 return self
2055
2057 try:
2058 if not self.cursor.closed:
2059 self.cursor.close()
2060 finally:
2061 if not self.connection.closed:
2062 self.connection.close()
2063
2084
2085 @staticmethod
2086 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
2097
2104
2111
2118
2125
2126 - def scores(self, benchmark_id, type):
2132
2139
2148
2149 - def structure(self, accession, chain=None):
2150
2151 pdbfile = self._find(accession, self._pdb)
2152
2153 if not pdbfile and chain:
2154 pdbfile = self._find(accession + chain, self._pdb)
2155
2156 if not pdbfile:
2157 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
2158
2159 return self._parser(pdbfile).parse_structure()
2160
2161 - def prediction(self, target_id, type, ss=False):
2162
2163 info = self.target_details(target_id)
2164 if not info:
2165 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
2166 row = info[0]
2167
2168 id = row["Accession"]
2169 length = float(row["Length"])
2170 overlap = float(row["MaxOverlap"]) / (length or 1.)
2171
2172 native = self.structure(id[:4], id[4]).chains[id[4]]
2173 segments = self.target_segments(target_id)
2174 target = self._factory.target(id, length, native.residues, overlap, segments)
2175
2176 source = None
2177
2178 for row in self.assignments(target_id, type):
2179
2180 src_accession = row['Source'][:4]
2181 src_chain = row['Source'][4]
2182
2183 if source is None or source.accession != src_accession:
2184 try:
2185 source = self.structure(src_accession, src_chain)
2186 except (IOError, ValueError) as ex:
2187 target.errors.append(ex)
2188 continue
2189
2190 if src_chain == '_':
2191 frag_chain = source.first_chain
2192 else:
2193 frag_chain = source.chains[src_chain]
2194 if not frag_chain.has_torsion:
2195 frag_chain.compute_torsion()
2196
2197 fragment = self._factory.assignment(
2198 source=frag_chain,
2199 start=row['SourceStart'],
2200 end=row['SourceEnd'],
2201 id=row['FragmentName'],
2202 qstart=row['Start'],
2203 qend=row['End'],
2204 probability=row['Probability'],
2205 score=row['Score'],
2206 neff=row['Neff'],
2207 rmsd=row['RMSD'],
2208 tm_score=row['TMScore'],
2209 segment=row['SegmentStart'],
2210 internal_id=row['InternalID'])
2211
2212 target.assign(fragment)
2213
2214 if ss:
2215 self._attach_sec_structure(target, target_id, type)
2216
2217 return target
2218
2234