1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
30
36
37 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
38 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
39 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
40 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
41 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
42 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
43 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
44 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
45 53: 11.831245255954073, 54: 11.918975893263905 }
48 """
49 Base class, representing a match between a fragment and its target.
50 """
51
52 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
61
62 @property
65
66 @property
69
70 @property
73
74 @property
77
78 @property
81
82 @property
85
86 @property
88 return self._probability
89
90 @property
93
94 @property
96 raise NotImplementedError()
97
98 @property
100 raise NotImplementedError()
101
102 @property
104 raise NotImplementedError()
105
107
108 - def __init__(self, target, isites_prediction, hmm_prediction, combined_prediction):
109
110 self.target = target
111
112 self.isites = isites_prediction
113 self.hmm = hmm_prediction
114 self.combined = combined_prediction
115
117
118 - def __init__(self, alignment, coordinates):
122
124 """
125 Fragment-based phi/psi angles predictor.
126
127 @param target: target protein, containing fragment assignments
128 @type target: L{Target}
129 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
130 @type threshold: float
131 @param extend: pick alternative, longer cluster reps, if possible
132 @type extend: bool
133 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
134 will be performed on demand (the first time C{predictor.compute()} is invoked)
135
136 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
137 time. Subsequent calls will be very fast.
138 """
139
140 - def __init__(self, target, threshold=1.5, extend=False, init=False):
141
142 if not isinstance(target, Target):
143 raise TypeError(target)
144 if target.matches.length == 0:
145 raise ValueError('This target has no fragment assignments')
146
147 self._target = target
148 self._threshold = float(threshold)
149 self._extend = bool(extend)
150
151 self._initialized = False
152 self._reps = {}
153
154 if init:
155 self.init()
156
157 @property
160
161 @property
163 return self._threshold
164
165 @property
168
183
191
213
246
248 """
249 Struct container for a single torsion angle prediction.
250
251 @param rank: target residue rank
252 @type rank: int
253 @param confidence: confidence of prediction
254 @type confidence: float
255 @param torsion: assigned phi/psi/omega angles
256 @type torsion: L{TorsionAngles}
257 @param primary: if True, designates that the assigned angles are extracted
258 from the L{ClusterRep} at residue C{#rank}; otherwise: the
259 angles are coming from another, overlapping L{ClusterRep}
260
261 """
262
263 - def __init__(self, rank, confidence, torsion, primary=False):
269
276
278 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
279
282
294
295 -class Target(csb.core.AbstractNIContainer):
296 """
297 Represents a protein structure prediction target.
298
299 @param id: target sequence ID, in PDB accnC format
300 @type id: str
301 @param length: total target sequence length
302 @type length: int
303 @param residues: a list, containing target's residues. See also
304 L{Target.from_sequence}
305 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
306 """
307
327
328 @staticmethod
349
350 @staticmethod
363
364 @staticmethod
369
370 @property
372 return self._residues
373
374 @property
377
378 @property
381
382 @property
384 return self._accession
385
386 @property
388 return self._chain_id
389
390 @property
393
394 @property
397
398 @property
401
402 @property
404 return self._assignments
405
406 @property
408 return self._residues
409
410 @property
412 return self._segments
413
435
437 """
438 Assign a bunch of fragments at once.
439 @type fragments: iterable of L{Assignment}s
440 """
441 for frag in fragments:
442 self.assign(frag)
443
444 - def filter(self, threshold=1.5, extend=False):
445 """
446 Filter the current fragment map using a L{FragmentCluster}.
447
448 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
449 @type threshold: float
450 @param extend: pick extended alternatives where possible (default=False)
451 @type extend: bool
452
453 @return: a new target, containing only cluster centroids/reps
454 @rtype: L{Target}
455 """
456
457 target = self.clone()
458
459 for residue in self.residues:
460 rep = residue.filter(threshold=threshold, extend=extend)
461
462 if rep is not None:
463 target.assign(rep.centroid)
464
465 return target
466
480
483 """
484 Wrapper around L{Target}'s native residues. Decorates them with additional,
485 fragment-related methods.
486
487 @type native_residue: L{csb.bio.structure.ProteinResidue}
488 """
489
495
496 @property
499
500 @property
503
504 @property
506 return self._assignments
507
508 - def assign(self, assignment_info):
509 self._assignments._append_item(assignment_info)
510
529
563
576
578 """
579 @return: the residue-wise precision of the fragment library at the
580 current position (percentage).
581
582 @param threshold: true-positive RMSD cutoff (default=1.5)
583 @type threshold: float
584 @rtype: float
585 """
586
587 if self.assignments.length < 1:
588 return None
589 else:
590 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
591 pos = len(positive) * 100.0 / self.assignments.length
592
593 return pos
594
731
762
764
765 binsize = float(binsize)
766 bins = numpy.ceil(numpy.array(data) / binsize)
767
768 hist = dict.fromkeys(bins, 0)
769 for bin in bins:
770 hist[bin] += (1.0 / len(bins))
771
772 freq = numpy.array(hist.values())
773 return - numpy.sum(freq * numpy.log(freq))
774
776
777 rmsds = self.pairwise_rmsd()
778 return self._entropy(rmsds, binsize)
779
784
786
787 rmsds = self.pairwise_rmsd()
788
789 if len(rmsds) < 1:
790 return None
791
792 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
793
795
796 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
797
798 if len(sa_rmsds) < 1:
799 return None
800
801 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
802
809
811
812 cons = self.rmsd_consistency()
813
814 if cons is None:
815 return 0
816 else:
817 return numpy.log10(self.count) * cons
818
820
829
830 @property
832 return self._assignment.backbone[self._relrank]
833
834 @property
836 return self._assignment
837
839 """
840 Represents a match between a fragment and its target.
841
842 @param source: source structure (must have torsion angles precomputed)
843 @type source: L{csb.bio.structure.Chain}
844 @param start: start position in C{source} (rank)
845 @type start: int
846 @param end: end position in C{source} (rank)
847 @type end: int
848 @param id: fragment ID
849 @type id: str
850 @param qstart: start position in target (rank)
851 @type qstart: int
852 @param qend: end position in target (rank)
853 @type qend: int
854 @param probability: probability of assignment
855 @type probability: float
856 @param rmsd: RMSD of the fragment, compared to target's native structure
857 @type rmsd: float
858 """
859
860 - def __init__(self, source, start, end, id, qstart, qend, probability, rmsd, tm_score=None,
861 score=None, neff=None, segment=None, internal_id=None):
862
863 assert source.has_torsion
864 sub = source.subregion(start, end, clone=True)
865 try:
866 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
867 except csb.core.ItemNotFoundError:
868 raise csb.bio.structure.Broken3DStructureError()
869 torsion = [r.torsion.copy() for r in sub.residues]
870
871 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
872 self._torsion = torsion
873 self._sequence = sub.sequence
874
875 self._source_id = source.accession[:4] + source.id
876 self._start = start
877 self._end = end
878
879 self._score = score
880 self._neff = neff
881
882 self._segment_start = segment
883 self.internal_id = internal_id
884
885 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
886
887 @property
890
891 @property
893 return self._sequence
894
895 @property
898
899 @property
901 return self._source_id
902
903 @property
906
907 @property
910
911 @property
914
915 @property
918
919 @property
921 return self._segment_start
922
932
937
939 """
940 @return: True if the fragment is centered around position=C{rank}.
941 @rtype: bool
942 """
943
944 if self.qstart < rank < self.qend:
945 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
946 return True
947
948 return False
949
951 """
952 @return: the CA coordinates of the fragment at the specified subregion.
953 @rtype: list
954 """
955
956 self._check_range(qstart, qend)
957
958 relstart = qstart - self.qstart
959 relend = qend - self.qstart + 1
960
961 return self.backbone[relstart : relend]
962
964 """
965 @return: the torsion angles of the fragment at the specified subregion.
966 @rtype: list
967 """
968
969 self._check_range(qstart, qend)
970
971 relstart = qstart - self.qstart
972 relend = qend - self.qstart + 1
973
974 return self.torsion[relstart : relend]
975
984
996
997 - def chain_at(self, source, qstart, qend):
1005
1007 """
1008 @type other: L{Assignment}
1009 @return: target positions, covered by both C{self} and C{other}
1010 @rtype: set of int
1011 """
1012
1013 qranks = set(range(self.qstart, self.qend + 1))
1014 sranks = set(range(other.qstart, other.qend + 1))
1015
1016 return qranks.intersection(sranks)
1017
1018 - def rmsd_to(self, other, min_overlap=5):
1019 """
1020 @return: the CA RMSD between C{self} and C{other}.
1021
1022 @param other: another fragment
1023 @type other: L{Assignment}
1024 @param min_overlap: require at least that number of overlapping residues
1025 (return None if not satisfied)
1026 @type min_overlap: int
1027
1028 @rtype: float
1029 """
1030
1031 common = self.overlap(other)
1032
1033 if len(common) >= min_overlap:
1034
1035 qstart, qend = min(common), max(common)
1036
1037 q = self.backbone_at(qstart, qend)
1038 s = other.backbone_at(qstart, qend)
1039
1040 if len(q) > 0 and len(s) > 0:
1041 return csb.bio.utils.rmsd(q, s)
1042
1043 return None
1044
1045 - def nrmsd_to(self, other, min_overlap=5):
1060
1061 - def mda_to(self, other, min_overlap=5):
1062
1063 common = self.overlap(other)
1064
1065 if len(common) >= min_overlap:
1066
1067 qstart, qend = min(common), max(common)
1068
1069 q = self.torsion_at(qstart, qend)
1070 s = other.torsion_at(qstart, qend)
1071
1072 if len(q) > 0 and len(s) > 0:
1073
1074 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:])
1075 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1])
1076
1077 return max(maxphi, maxpsi)
1078
1079 return None
1080
1081 - def to_rosetta(self, source, qstart=None, qend=None, weight=None):
1082 """
1083 @deprecated: this method will be deleted soon. Use
1084 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1085 """
1086 stream = csb.io.MemoryStream()
1087
1088 if weight is None:
1089 weight = self.probability
1090 if not qstart:
1091 qstart = self.qstart
1092 if not qend:
1093 qend = self.qend
1094
1095 source.compute_torsion()
1096 chain = self.chain_at(source, qstart, qend)
1097
1098 for i, r in enumerate(chain.residues):
1099
1100 acc = self.source_id[:4]
1101 ch = self.source_id[4].upper()
1102
1103 start = qstart - self.qstart + self.start + i
1104 aa = r.type
1105 ss = 'L'
1106 phi, psi, omega = 0, 0, 0
1107 if r.torsion.phi:
1108 phi = r.torsion.phi
1109 if r.torsion.psi:
1110 psi = r.torsion.psi
1111 if r.torsion.omega:
1112 omega = r.torsion.omega
1113
1114 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1115
1116 return stream.getvalue()
1117
1120
1123
1126
1128 """
1129 Provides clustering/filtering of the fragments, covering a common residue
1130 in the target. Clustering is done via iterative shrinking of the cluster.
1131 At each iteration, node rejection (deletion) is attempted for each node. The
1132 node rejection, causing the most significant drop in the average pairwise
1133 distance (RMSD) in the cluster, is retained. This procedure is repeated
1134 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1135 2) the cluster gets exhausted or 3) node rejection no longer
1136 causes a drop in the average distance (not converging).
1137
1138 @param items: cluster members
1139 @type items: iterable of L{ClusterNode}s
1140 @param threshold: RMSD threshold; continue shrinking until the mean distance
1141 drops below this value (default=1.5)
1142 @type threshold: float
1143 @param connectedness: use only nodes which are connected to at least c% of all
1144 initial nodes (default=0.5, that means 50%)
1145 @type connectedness: float
1146 """
1147
1148 MIN_LENGTH = 6
1149
1150 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1179
1180 @property
1182 return len(self._items)
1183
1184 @property
1186 return tuple(self._items)
1187
1188 @property
1190 return [i.fragment for i in self._items]
1191
1192 @property
1194 return self._threshold
1195 @threshold.setter
1197 self._threshold = float(value)
1198
1199 @property
1201 return self._connectedness
1202
1204
1205 d = []
1206
1207 for i in self._matrix:
1208 if skip is i:
1209 continue
1210
1211 for j in self._matrix[i]:
1212 if skip is not j:
1213 d.append(self._matrix[i][j])
1214
1215 return d
1216
1218
1219 if j in self._matrix[i]:
1220 return self._matrix[i][j]
1221 else:
1222 return None
1223
1224 - def mean(self, skip=None):
1225 """
1226 @return: the current mean distance in the cluster
1227 @rtype: float
1228 """
1229
1230 d = self._distances(skip=skip)
1231
1232 if len(d) > 0:
1233 return numpy.mean(d)
1234 else:
1235 raise ClusterExhaustedError()
1236
1238 """
1239 @return: the current representative fragment
1240 @rtype: L{ClusterRep}
1241
1242 @note: the cluster rep is the node with the lowest average distance
1243 to all other nodes. If a fixed fragment exists, structurally similar
1244 to the rep, but longer, this fragment may be suggested as an alternative
1245 (see also L{ClusterRep}).
1246 """
1247
1248 alt = None
1249 cen = None
1250 avg = None
1251
1252 for i in self._matrix:
1253
1254 curravg = numpy.mean(list(self._matrix[i].values()))
1255
1256 if avg is None or curravg < avg:
1257 avg = curravg
1258 cen = i
1259 elif curravg == avg:
1260 if i.fragment.length > cen.fragment.length:
1261 cen = i
1262
1263 d = self._distances()
1264 mean = numpy.mean(d)
1265 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1266
1267 for i in self._matrix:
1268 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1269 distance = self._distance(i, cen)
1270 if distance is not None and distance < 0.5 * self.threshold:
1271 if alt is None or alt.fragment.length < i.fragment.length:
1272 alt = i
1273
1274 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1275 rejections=(self._initcount - self.count))
1276
1278 """
1279 Remove C{item} from the cluster.
1280
1281 @type item: L{ClusterNode}
1282 @raise ClusterExhaustedError: if this is the last remaining item
1283 """
1284
1285 if self.count == 1:
1286 raise ClusterExhaustedError()
1287
1288 assert not item.fixed
1289
1290 for i in self._matrix:
1291 if item in self._matrix[i]:
1292 del self._matrix[i][item]
1293
1294 del self._matrix[item]
1295 self._items.remove(item)
1296
1298 """
1299 Shrink the cluster by a single node.
1300
1301 @return: True on successful shrink, False otherwise (e.g. if
1302 already converged)
1303 @rtype: bool
1304 @raise ClusterExhaustedError: if exhausted
1305 @raise ClusterDivergingError: if not converging
1306 """
1307
1308 mean = self.mean()
1309 if mean <= self.threshold or self.count == 1:
1310 return False
1311
1312 m = {}
1313
1314 for i in self._matrix:
1315 if not i.fixed:
1316 newmean = self.mean(skip=i)
1317 m[newmean] = i
1318
1319 if len(m) == 0:
1320 raise ClusterExhaustedError()
1321
1322 newmean = min(m)
1323
1324 if newmean > mean:
1325 raise ClusterDivergingError()
1326 elif newmean < mean:
1327 junk = m[newmean]
1328 self.reject(junk)
1329 return True
1330 else:
1331 return False
1332
1333 - def shrink(self, minitems=2):
1334 """
1335 Start automatic shrinking.
1336
1337 @param minitems: absolute minimum of the number of nodes in the cluster
1338 @type minitems: int
1339
1340 @return: cluster's representative: the node with the lowest average
1341 distance to all other nodes in the cluster
1342 @rtype: L{ClusterRep}
1343
1344 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1345 still not converged
1346 """
1347
1348 if self.count > minitems:
1349
1350 while self.shrinkone():
1351 if self.count <= minitems:
1352 raise ClusterExhaustedError()
1353 else:
1354 raise ClusterExhaustedError()
1355
1356 return self.centroid()
1357
1359 """
1360 Cluster node.
1361
1362 @param fragment: fragment
1363 @type fragment: L{Assignment}
1364 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1365 @type distance: str
1366 @param fixed: mark this node as fixed (cannot be rejected)
1367 @type fixed: bool
1368 """
1369
1379
1381 """
1382 @return: the distance between self and another node
1383 @type other: L{ClusterNode}
1384 @rtype: float
1385 """
1386 return self._distance(other.fragment)
1387
1389 """
1390 Cluster's representative (centroid) node. This object carries the
1391 result of shrinking itself.
1392
1393 @param centroid: rep node
1394 @type centroid: L{ClusterNode}
1395 @param mean: current mean distance in the cluster
1396 @type mean: float
1397 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1398 @type consistency: float
1399 @param count: current number of nodes in the cluster
1400 @type count: int
1401 @param rejections: total number of rejections
1402 @type rejections: int
1403 @param alternative: suggested cluster rep alternative (e.g. structurally
1404 similar to the centroid, but longer)
1405 @type param:
1406 """
1407
1408 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1421
1422 @property
1424 """
1425 Confidence of assignment: log10(count) * consistency
1426 """
1427 if self.count <= 0 or self.count is None or self.consistency is None:
1428 return 0
1429 else:
1430 return numpy.log10(self.count) * self.consistency
1431
1432 @property
1434 return self._centroid
1435
1436 @property
1438 return self._alternative
1439
1440 @property
1442 return self._alternative is not None
1443
1444 @property
1447
1448 @property
1450 return self._consistency
1451
1452 @property
1455
1456 @property
1458 return self._rejections
1459
1461 """
1462 If an alternative is available, swap the centroid and the alternative.
1463 """
1464
1465 if self._alternative is not None:
1466
1467 centroid = self._centroid
1468 self._centroid = self._alternative
1469 self._alternative = centroid
1470
1476
1478
1479 @staticmethod
1493
1494 - def __init__(self, center, qstart, qend):
1504
1505 @property
1508
1509 @property
1512
1513 @property
1516
1517 @property
1520
1521 @property
1524
1525 @property
1528
1529 - def chain(self, source):
1531
1534
1590
1593
1594 - def __init__(self, rank, confidence=None, count=None, confident=True, rep=None):
1601
1603 """
1604 Simplifies the construction of fragment libraries.
1605 """
1606
1610
1627
1629 """
1630 Build a fixed-length fragment library from a list of
1631 variable-length L{Assignment}s.
1632
1633 @param fragments: source fragments
1634 @type fragments: iterable of L{RosettaFragment}s
1635 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1636 @type window: int
1637
1638 @return: fixed-length fragment library
1639 @rtype: L{RosettaFragmentMap}
1640 """
1641
1642 frags = []
1643
1644 for f in fragments:
1645 for qs in range(f.qstart, f.qend - window + 1):
1646 frags.append(f.subregion(qs, qs + window - 1))
1647
1648 return self.rosetta.RosettaFragmentMap(frags)
1649
1650 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1651 """
1652 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1653 The regions to be complemented are determined by calculating the confidence
1654 at each residue (by filtering).
1655
1656
1657 @param target: target protein
1658 @type target: L{Target}
1659 @param filling: additional fragments to place in the low-conf regions
1660 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1661 @param threshold: confidence threshold
1662 @type threshold: float
1663
1664 @return: complemented fragment library
1665 @rtype: L{RosettaFragmentMap}
1666 """
1667
1668 fragmap = self.make_fragset(target)
1669 covered = set()
1670
1671 for r in target.residues:
1672
1673 if r.assignments.length == 0:
1674 if callback:
1675 callback(ResidueEventInfo(r.native.rank, None, 0, False))
1676 continue
1677
1678 cluster = r.filter()
1679 if cluster is None:
1680 if callback:
1681 callback(ResidueEventInfo(r.native.rank, 0, 0, False))
1682 continue
1683
1684 if cluster.confidence >= threshold:
1685 covered.add(r.native.rank)
1686 elif callback:
1687 callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, False))
1688
1689 for r in target.residues:
1690 if r.native.rank not in covered:
1691 fragmap.mark_unconfident(r.native.rank)
1692
1693 for frag in filling:
1694 fragmap.complement(frag)
1695
1696 return fragmap
1697
1734
1735 - def mix(self, *fragsets):
1755
1758
1760
1761 FACTORY = None
1762 DSN = None
1763
1764 - def __init__(self, factory=None, dsn=None):
1765
1766 self.factory = factory or self.__class__.FACTORY
1767 self.cs = dsn or self.__class__.DSN
1768 self.connection = None
1769 self.cursor = None
1770
1772
1773 self.connection = self.factory(self.cs)
1774 try:
1775 self.cursor = self.connection.cursor()
1776 except:
1777 self.connection.close()
1778 raise
1779 return self
1780
1782 try:
1783 if not self.cursor.closed:
1784 self.cursor.close()
1785 finally:
1786 if not self.connection.closed:
1787 self.connection.close()
1788
1809
1810 @staticmethod
1811 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
1812
1813 fields = ['dbname={0}'.format(database)]
1814
1815 if host:
1816 fields.append('host={0}'.format(host))
1817 if username:
1818 fields.append('user={0}'.format(username))
1819 fields.append('password={0}'.format(password))
1820
1821 return ' '.join(fields)
1822
1829
1836
1843
1844 - def scores(self, benchmark_id, type):
1850
1857
1866
1867 - def structure(self, accession, chain=None):
1868
1869 pdbfile = self._find(accession, self._pdb)
1870
1871 if not pdbfile and chain:
1872 pdbfile = self._find(accession + chain, self._pdb)
1873
1874 if not pdbfile:
1875 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
1876
1877 return self._parser(pdbfile).parse_structure()
1878
1880
1881 info = self.target_details(target_id)
1882 if not info:
1883 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
1884 row = info[0]
1885
1886 id = row["Accession"]
1887 length = float(row["Length"])
1888 overlap = float(row["MaxOverlap"]) / (length or 1.)
1889
1890 native = self.structure(id[:4], id[4]).chains[id[4]]
1891 segments = self.target_segments(target_id)
1892 target = self._factory.target(id, length, native.residues, overlap, segments)
1893
1894 source = None
1895
1896 for row in self.assignments(target_id, type):
1897
1898 src_accession = row['Source'][:4]
1899 src_chain = row['Source'][4]
1900
1901 if source is None or source.accession != src_accession:
1902 try:
1903 source = self.structure(src_accession, src_chain)
1904 except (IOError, ValueError) as ex:
1905 target.errors.append(ex)
1906 continue
1907
1908 if src_chain == '_':
1909 frag_chain = source.first_chain
1910 else:
1911 frag_chain = source.chains[src_chain]
1912 if not frag_chain.has_torsion:
1913 frag_chain.compute_torsion()
1914
1915 fragment = self._factory.assignment(
1916 source=frag_chain,
1917 start=row['SourceStart'],
1918 end=row['SourceEnd'],
1919 id=row['FragmentName'],
1920 qstart=row['Start'],
1921 qend=row['End'],
1922 probability=row['Probability'],
1923 score=row['Score'],
1924 neff=row['Neff'],
1925 rmsd=row['RMSD'],
1926 tm_score=row['TMScore'],
1927 segment=row['SegmentStart'],
1928 internal_id=row['InternalID'])
1929
1930 target.assign(fragment)
1931
1932 return target
1933