Package csb :: Package bio :: Package fragments
[frames] | no frames]

Source Code for Package csb.bio.fragments

   1  """ 
   2  APIs for working with protein structure fragments and libraries. 
   3   
   4  This package contains the nuts and bolts of HHfrag. Everything here revolves 
   5  around the L{Target} class, which describes a protein structure prediction 
   6  target. One typically assigns fragments (L{Assignment}s) to the target and then 
   7  builds a fragment library with L{RosettaFragsetFactory}. 
   8   
   9  @note: Internal or legacy objects are intentionally left undocumented. 
  10         This typically indicates experimental code. 
  11  """ 
  12   
  13  import os 
  14  import numpy 
  15   
  16  import csb.io 
  17  import csb.core 
  18  import csb.bio.utils 
  19  import csb.bio.structure 
  20  import csb.bio.sequence 
21 22 23 -class FragmentTypes(object):
24 25 ISites = 'IS' 26 HMMFragments = 'HH' 27 HHThread = 'TH' 28 HHfrag = HHThread 29 Rosetta = 'NN'
30
31 -class Metrics(object):
32 33 RMSD = 'rmsd_to' 34 NORMALIZED_RMSD = 'nrmsd_to' 35 MDA = 'mda_to'
36 37 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329, 38 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662, 39 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401, 40 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637, 41 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163, 42 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028, 43 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550, 44 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840, 45 53: 11.831245255954073, 54: 11.918975893263905 }
46 47 -class FragmentMatch(object):
48 """ 49 Base class, representing a match between a fragment and its target. 50 """ 51
52 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
53 54 self._id = id 55 self._qstart = qstart 56 self._qend = qend 57 self._probability = probability 58 self._rmsd = rmsd 59 self._tm_score = tm_score 60 self._qlength = qlength
61 62 @property
63 - def id(self):
64 return self._id
65 66 @property
67 - def qstart(self):
68 return self._qstart
69 70 @property
71 - def qend(self):
72 return self._qend
73 74 @property
75 - def qlength(self):
76 return self._qlength
77 78 @property
79 - def rmsd(self):
80 return self._rmsd
81 82 @property
83 - def tm_score(self):
84 return self._tm_score
85 86 @property
87 - def probability(self):
88 return self._probability
89 90 @property
91 - def length(self):
92 return self.qend - self.qstart + 1
93 94 @property
95 - def source_id(self):
96 raise NotImplementedError()
97 98 @property
99 - def start(self):
100 raise NotImplementedError()
101 102 @property
103 - def end(self):
104 raise NotImplementedError()
105
106 -class PredictionContainer(object):
107
108 - def __init__(self, target, isites_prediction, hmm_prediction, combined_prediction):
109 110 self.target = target 111 112 self.isites = isites_prediction 113 self.hmm = hmm_prediction 114 self.combined = combined_prediction
115
116 -class Prediction(object):
117
118 - def __init__(self, alignment, coordinates):
119 120 self.alignment = alignment 121 self.coordinates = coordinates
122
123 -class TorsionAnglesPredictor(object):
124 """ 125 Fragment-based phi/psi angles predictor. 126 127 @param target: target protein, containing fragment assignments 128 @type target: L{Target} 129 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering 130 @type threshold: float 131 @param extend: pick alternative, longer cluster reps, if possible 132 @type extend: bool 133 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step 134 will be performed on demand (the first time C{predictor.compute()} is invoked) 135 136 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long 137 time. Subsequent calls will be very fast. 138 """ 139
140 - def __init__(self, target, threshold=1.5, extend=False, init=False):
141 142 if not isinstance(target, Target): 143 raise TypeError(target) 144 if target.matches.length == 0: 145 raise ValueError('This target has no fragment assignments') 146 147 self._target = target 148 self._threshold = float(threshold) 149 self._extend = bool(extend) 150 151 self._initialized = False 152 self._reps = {} 153 154 if init: 155 self.init()
156 157 @property
158 - def target(self):
159 return self._target
160 161 @property
162 - def threshold(self):
163 return self._threshold
164 165 @property
166 - def extend(self):
167 return self._extend
168
169 - def init(self):
170 """ 171 Compute and cache all L{FragmentCluster}s. 172 """ 173 174 self._reps = {} 175 176 for residue in self.target.residues: 177 rep = residue.filter(threshold=self.threshold, extend=self.extend) 178 179 if rep is not None: 180 self._reps[residue.native.rank] = rep 181 182 self._initialized = True
183
184 - def _residue(self, rank):
185 186 for r in self._target.residues: 187 if r.native.rank == rank: 188 return r 189 190 raise ValueError('Rank {0} is out of range'.format(rank))
191
192 - def compute_single(self, rank):
193 """ 194 Extract torsion angles from the L{ClusterRep} at residue C{#rank}. 195 196 @param rank: target residue rank 197 @type rank: int 198 199 @rtype: L{TorsionPredictionInfo} 200 """ 201 202 residue = self._residue(rank) 203 rep = residue.filter(threshold=self.threshold, extend=self.extend) 204 205 if rep is None: 206 return None 207 208 else: 209 fragment = rep.centroid 210 torsion = fragment.torsion_at(rank, rank)[0] 211 212 return TorsionPredictionInfo(rank, rep.confidence, torsion, primary=True)
213
214 - def compute(self, rank):
215 """ 216 Extract torsion angles from all L{ClusterRep}s, covering residue C{#rank}. 217 218 @param rank: target residue rank 219 @type rank: int 220 221 @return: a tuple of L{TorsionPredictionInfo}, sorted by confidence 222 @rtype: tuple 223 """ 224 225 if not self._initialized: 226 self.init() 227 228 residue = self._residue(rank) 229 prediction = [] 230 231 for rep in self._reps.values(): 232 233 if rep.centroid.qstart <= residue.native.rank <= rep.centroid.qend: 234 235 fragment = rep.centroid 236 torsion = fragment.torsion_at(rank, rank)[0] 237 info = TorsionPredictionInfo(rank, rep.confidence, torsion) 238 239 if rep is self._reps.get(rank, None): 240 info.primary = True 241 242 prediction.append(info) 243 244 prediction.sort(reverse=True) 245 return tuple(prediction)
246
247 -class TorsionPredictionInfo(object):
248 """ 249 Struct container for a single torsion angle prediction. 250 251 @param rank: target residue rank 252 @type rank: int 253 @param confidence: confidence of prediction 254 @type confidence: float 255 @param torsion: assigned phi/psi/omega angles 256 @type torsion: L{TorsionAngles} 257 @param primary: if True, designates that the assigned angles are extracted 258 from the L{ClusterRep} at residue C{#rank}; otherwise: the 259 angles are coming from another, overlapping L{ClusterRep} 260 261 """ 262
263 - def __init__(self, rank, confidence, torsion, primary=False):
264 265 self.rank = rank 266 self.confidence = confidence 267 self.torsion = torsion 268 self.primary = primary
269
270 - def as_tuple(self):
271 """ 272 @return: convert this prediction to a tuple: (confidence, phi, psi, omega) 273 @rtype: tuple 274 """ 275 return tuple([self.confidence, self.torsion.phi, self.torsion.psi, self.torsion.omega])
276
277 - def __str__(self):
278 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
279
280 - def __lt__(self, other):
281 return self.confidence < other.confidence
282
283 284 -class AssignmentFactory(object):
285
286 - def target(self, *a, **k):
287 return Target(*a, **k)
288
289 - def residue(self, *a, **k):
290 return TargetResidue(*a, **k)
291
292 - def assignment(self, *a, **k):
293 return Assignment(*a, **k)
294
295 -class Target(csb.core.AbstractNIContainer):
296 """ 297 Represents a protein structure prediction target. 298 299 @param id: target sequence ID, in PDB accnC format 300 @type id: str 301 @param length: total target sequence length 302 @type length: int 303 @param residues: a list, containing target's residues. See also 304 L{Target.from_sequence} 305 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s 306 """ 307
308 - def __init__(self, id, length, residues, overlap=None, segments=None, factory=AssignmentFactory()):
309 310 self._id = id 311 self._accession = id[:-1] 312 self._chain_id = id[-1] 313 self._length = length 314 self._overlap = overlap 315 self._factory = factory 316 317 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment) 318 self._errors = csb.core.CollectionContainer() 319 320 resi = [factory.residue(native) for native in residues] 321 self._residues = csb.core.ReadOnlyCollectionContainer(items=resi, 322 type=TargetResidue, start_index=1) 323 324 if segments is not None: 325 segments = dict([(s.start, s) for s in segments]) 326 self._segments = csb.core.ReadOnlyDictionaryContainer(items=segments)
327 328 @staticmethod
329 - def from_sequence(id, sequence):
330 """ 331 Factory, which builds L{Target} objects from a bare sequence. 332 333 @param sequence: target's sequence 334 @type sequence: L{csb.bio.sequence.AbstractSequence}, str or iterable 335 336 @rtype: L{Target} 337 """ 338 339 if isinstance(sequence, csb.bio.sequence.Sequence): 340 sequence = sequence.sequence 341 342 residues = [] 343 344 for rn, aa in enumerate(sequence, start=1): 345 residue = csb.bio.structure.ProteinResidue(rank=rn, type=aa) 346 residues.append(residue) 347 348 return Target(id, len(residues), residues)
349 350 @staticmethod
351 - def from_profile(hmm):
352 """ 353 Factory, which builds L{Target} objects from an HMM profile. 354 355 @param hmm: target's HMM 356 @type hmm: L{csb.bio.hmm.ProfileHMM} 357 358 @rtype: L{Target} 359 """ 360 361 residues = [ r.clone() for r in hmm.residues ] 362 return Target(hmm.id, hmm.layers.length, residues)
363 364 @staticmethod
365 - def deserialize(pickle):
366 367 with open(pickle) as stream: 368 return csb.io.Pickle.load(stream)
369 370 @property
371 - def _children(self):
372 return self._residues
373 374 @property
375 - def errors(self):
376 return self._errors
377 378 @property
379 - def id(self):
380 return self._id
381 382 @property
383 - def accession(self):
384 return self._accession
385 386 @property
387 - def chain_id(self):
388 return self._chain_id
389 390 @property
391 - def max_overlap(self):
392 return self._overlap
393 394 @property
395 - def length(self):
396 return self._length
397 398 @property
399 - def sequence(self):
400 return ''.join(str(r.native.type) for r in self)
401 402 @property
403 - def matches(self):
404 return self._assignments
405 406 @property
407 - def residues(self):
408 return self._residues
409 410 @property
411 - def segments(self):
412 return self._segments
413
414 - def assign(self, fragment):
415 """ 416 Add a new fragment match. 417 @param fragment: fragment to assign 418 @type fragment: L{Assignment} 419 """ 420 421 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues): 422 raise ValueError("Fragment out of range") 423 424 self._assignments._append_item(fragment) 425 426 for rank in range(fragment.qstart, fragment.qend + 1): 427 ai = ResidueAssignmentInfo(fragment, rank) 428 self._residues[rank].assign(ai) 429 430 if fragment.segment is not None: 431 try: 432 self._segments[fragment.segment].assign(fragment) 433 except KeyError: 434 raise ValueError("Undefined segment starting at {0}".format(fragment.segment))
435
436 - def assignall(self, fragments):
437 """ 438 Assign a bunch of fragments at once. 439 @type fragments: iterable of L{Assignment}s 440 """ 441 for frag in fragments: 442 self.assign(frag)
443
444 - def filter(self, threshold=1.5, extend=False):
445 """ 446 Filter the current fragment map using a L{FragmentCluster}. 447 448 @param threshold: cluster RMSD threshold (see L{FragmentCluster}) 449 @type threshold: float 450 @param extend: pick extended alternatives where possible (default=False) 451 @type extend: bool 452 453 @return: a new target, containing only cluster centroids/reps 454 @rtype: L{Target} 455 """ 456 457 target = self.clone() 458 459 for residue in self.residues: 460 rep = residue.filter(threshold=threshold, extend=extend) 461 462 if rep is not None: 463 target.assign(rep.centroid) 464 465 return target
466
467 - def clone(self):
468 """ 469 @return: a deep copy of the target 470 @rtype: L{Target} 471 """ 472 473 segments = [self.segments[start] for start in self.segments] 474 segments = [TargetSegment(s.start, s.end, s.count) for s in segments] 475 476 target = self._factory.target(self.id, self.length, [r.native for r in self.residues], 477 overlap=self._overlap, segments=segments) 478 479 return target
480
481 482 -class TargetResidue(object):
483 """ 484 Wrapper around L{Target}'s native residues. Decorates them with additional, 485 fragment-related methods. 486 487 @type native_residue: L{csb.bio.structure.ProteinResidue} 488 """ 489
490 - def __init__(self, native_residue):
491 492 self._type = native_residue.type 493 self._native = native_residue.clone() 494 self._assignments = csb.core.ReadOnlyCollectionContainer(type=ResidueAssignmentInfo)
495 496 @property
497 - def type(self):
498 return self._type
499 500 @property
501 - def native(self):
502 return self._native
503 504 @property
505 - def assignments(self):
506 return self._assignments
507
508 - def assign(self, assignment_info):
509 self._assignments._append_item(assignment_info)
510
511 - def verybest(self):
512 """ 513 @return: the fragment with the lowest RMSD at this position in the L{Target} 514 @rtype: L{Assignment} 515 """ 516 517 best = None 518 519 for ai in self.assignments: 520 a = ai.fragment 521 if a.length < FragmentCluster.MIN_LENGTH: 522 continue 523 if best is None or a.rmsd < best.rmsd: 524 best = a 525 elif a.rmsd == best.rmsd and a.length > best.length: 526 best = a 527 528 return best
529
530 - def filter(self, method=Metrics.RMSD, threshold=1.5, extend=False):
531 """ 532 Filter all fragments, covering this position in the L{Target} using a 533 L{FragmentCluster}. 534 535 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD}) 536 @type method: str 537 @param threshold: cluster RMSD threshold (see L{FragmentCluster}) 538 @type threshold: float 539 @param extend: pick extended alternative where possible (default=False) 540 @type extend: bool 541 542 @return: cluster's representative (if converged) or None 543 @rtype: L{ClusterRep} or None 544 """ 545 546 try: 547 nodes = [] 548 for ai in self.assignments: 549 if ai.fragment.probability > 0.7 and ai.fragment.length >= FragmentCluster.MIN_LENGTH: 550 node = ClusterNode(ai.fragment, distance=method, fixed=extend) 551 else: 552 node = ClusterNode(ai.fragment, distance=method, fixed=False) 553 nodes.append(node) 554 cluster = FragmentCluster(nodes, threshold=threshold) 555 556 center = cluster.shrink(minitems=0) 557 if center.has_alternative: 558 center.exchange() 559 return center 560 561 except (ClusterExhaustedError, ClusterDivergingError): 562 return None
563
564 - def longest(self):
565 """ 566 @return: the longest fragment, covering the current position 567 @rtype: L{Assignment} 568 """ 569 best = None 570 571 for q in self.assignments: 572 if best is None or (q.fragment.length > best.length): 573 best = q.fragment 574 575 return best
576
577 - def precision(self, threshold=1.5):
578 """ 579 @return: the residue-wise precision of the fragment library at the 580 current position (percentage). 581 582 @param threshold: true-positive RMSD cutoff (default=1.5) 583 @type threshold: float 584 @rtype: float 585 """ 586 587 if self.assignments.length < 1: 588 return None 589 else: 590 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold] 591 pos = len(positive) * 100.0 / self.assignments.length 592 593 return pos
594
595 -class TargetSegment(object):
596
597 - def __init__(self, start, end, count):
598 599 self._start = start 600 self._end = end 601 self._count = count 602 603 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
604 605 @property
606 - def count(self):
607 return self._count
608 609 @property
610 - def start(self):
611 return self._start
612 613 @property
614 - def end(self):
615 return self._end
616 617 @property
618 - def length(self):
619 return (self._end - self._start + 1)
620 621 @property
622 - def assignments(self):
623 return self._assignments
624
625 - def assign(self, fragment):
626 if fragment.segment != self.start: 627 raise ValueError('Segment origin mismatch: {0} vs {1}'.format(fragment.segment, self.start)) 628 else: 629 self._assignments._append_item(fragment)
630
631 - def verybest(self):
632 633 best = None 634 635 for a in self.assignments: 636 if a.length < FragmentCluster.MIN_LENGTH: 637 continue 638 if best is None or a.rmsd < best.rmsd: 639 best = a 640 elif a.rmsd == best.rmsd and a.length > best.length: 641 best = a 642 643 return best
644
645 - def best(self, method=Metrics.RMSD):
646 647 try: 648 cluster = FragmentCluster(self.assignments, threshold=1.5, 649 connectedness=0.5, method=method) 650 centroid = cluster.shrink(minitems=1) 651 return centroid 652 653 except ClusterExhaustedError: 654 return None 655 finally: 656 del cluster
657
658 - def longest(self):
659 660 best = None 661 662 for q in self.assignments: 663 if best is None or (q.length > best.length): 664 best = q 665 666 return best
667
668 - def pairwise_rmsd(self, min_overlap=5):
669 670 rmsds = [] 671 672 for q in self.assignments: 673 for s in self.assignments: 674 if q is not s: 675 r = q.rmsd_to(s, min_overlap) 676 if r is not None: 677 rmsds.append(r) 678 else: 679 assert q.rmsd_to(s, 1) < 0.01 680 681 return rmsds
682
683 - def pairwise_mda(self, min_overlap=5):
684 685 mdas = [] 686 687 for q in self.assignments: 688 for s in self.assignments: 689 if q is not s: 690 m = q.mda_to(s, min_overlap) 691 if m is not None: 692 mdas.append(m) 693 return mdas
694
695 - def pairwise_sa_rmsd(self, profiles='.', min_overlap=5):
696 697 from csb.bio.hmm import RELATIVE_SA 698 from csb.bio.io.hhpred import ScoreUnits, HHProfileParser 699 700 def convert_sa(sa): 701 return numpy.array([ RELATIVE_SA[i] for i in sa ])
702 703 sources = {} 704 scores = [] 705 706 for q in self.assignments: 707 for s in self.assignments: 708 709 if s.source_id not in sources: 710 hmm = HHProfileParser(os.path.join(profiles, s.source_id + '.hhm')).parse() 711 sources[s.source_id] = hmm.dssp_solvent 712 713 if q is not s: 714 715 common = q.overlap(s) 716 if len(common) >= min_overlap: 717 718 qsa = q.solvent_at(sources[q.source_id], min(common), max(common)) 719 ssa = s.solvent_at(sources[s.source_id], min(common), max(common)) 720 721 if '-' in qsa + ssa: 722 continue 723 724 qsa = convert_sa(qsa) 725 ssa = convert_sa(ssa) 726 assert len(qsa) == len(ssa) 727 sa_rmsd = numpy.sqrt(numpy.sum((qsa - ssa) ** 2) / float(len(qsa))) 728 729 scores.append(sa_rmsd) 730 return scores
731
732 - def pairwise_scores(self, profiles='.', min_overlap=5):
733 734 from csb.bio.hmm import BACKGROUND 735 back = numpy.sqrt(numpy.array(BACKGROUND)) 736 737 sources = {} 738 scores = [] 739 740 for q in self.assignments: 741 for s in self.assignments: 742 743 if s.source_id not in sources: 744 # hmm = HHProfileParser(os.path.join(hmm_path, s.source_id + '.hhm')).parse(ScoreUnits.Probability) 745 sources[s.source_id] = csb.io.Pickle.load(open(os.path.join(profiles, s.source_id + '.pkl'), 'rb')) 746 747 if q is not s: 748 749 common = q.overlap(s) 750 if len(common) >= min_overlap: 751 752 qprof = q.profile_at(sources[q.source_id], min(common), max(common)) 753 sprof = s.profile_at(sources[s.source_id], min(common), max(common)) 754 755 #score = qhmm.emission_similarity(shmm) 756 assert len(qprof) == len(sprof) 757 dots = [ numpy.dot(qprof[i] / back, sprof[i] / back) for i in range(len(qprof)) ] 758 score = numpy.log(numpy.prod(dots)) 759 if score is not None: 760 scores.append(score) 761 return scores
762
763 - def _entropy(self, data, binsize):
764 765 binsize = float(binsize) 766 bins = numpy.ceil(numpy.array(data) / binsize) 767 768 hist = dict.fromkeys(bins, 0) 769 for bin in bins: 770 hist[bin] += (1.0 / len(bins)) 771 772 freq = numpy.array(hist.values()) 773 return - numpy.sum(freq * numpy.log(freq))
774
775 - def rmsd_entropy(self, binsize=0.1):
776 777 rmsds = self.pairwise_rmsd() 778 return self._entropy(rmsds, binsize)
779
780 - def score_entropy(self, profiles='.', binsize=1):
781 782 scores = self.pairwise_scores(profiles) 783 return self._entropy(scores, binsize)
784
785 - def rmsd_consistency(self, threshold=1.5):
786 787 rmsds = self.pairwise_rmsd() 788 789 if len(rmsds) < 1: 790 return None 791 792 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
793
794 - def sa_rmsd_consistency(self, threshold=0.4, profiles='.'):
795 796 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles) 797 798 if len(sa_rmsds) < 1: 799 return None 800 801 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
802
803 - def true_positives(self, threshold=1.5):
804 805 if self.assignments.length < 1: 806 return None 807 808 return sum([1 for i in self.assignments if i.rmsd <= threshold]) / float(self.assignments.length)
809
810 - def confidence(self):
811 812 cons = self.rmsd_consistency() 813 814 if cons is None: 815 return 0 816 else: 817 return numpy.log10(self.count) * cons
818
819 -class ResidueAssignmentInfo(object):
820
821 - def __init__(self, assignment, rank):
822 823 if not assignment.qstart <= rank <= assignment.qend: 824 raise ValueError('Rank {0} is not matched by this assignment') 825 826 self._assignment = assignment 827 self._rank = rank 828 self._relrank = rank - assignment.qstart
829 830 @property
831 - def c_alpha(self):
832 return self._assignment.backbone[self._relrank]
833 834 @property
835 - def fragment(self):
836 return self._assignment
837
838 -class Assignment(FragmentMatch):
839 """ 840 Represents a match between a fragment and its target. 841 842 @param source: source structure (must have torsion angles precomputed) 843 @type source: L{csb.bio.structure.Chain} 844 @param start: start position in C{source} (rank) 845 @type start: int 846 @param end: end position in C{source} (rank) 847 @type end: int 848 @param id: fragment ID 849 @type id: str 850 @param qstart: start position in target (rank) 851 @type qstart: int 852 @param qend: end position in target (rank) 853 @type qend: int 854 @param probability: probability of assignment 855 @type probability: float 856 @param rmsd: RMSD of the fragment, compared to target's native structure 857 @type rmsd: float 858 """ 859
860 - def __init__(self, source, start, end, id, qstart, qend, probability, rmsd, tm_score=None, 861 score=None, neff=None, segment=None, internal_id=None):
862 863 assert source.has_torsion 864 sub = source.subregion(start, end, clone=True) 865 try: 866 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues] 867 except csb.core.ItemNotFoundError: 868 raise csb.bio.structure.Broken3DStructureError() 869 torsion = [r.torsion.copy() for r in sub.residues] 870 871 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray) 872 self._torsion = torsion 873 self._sequence = sub.sequence 874 875 self._source_id = source.accession[:4] + source.id 876 self._start = start 877 self._end = end 878 879 self._score = score 880 self._neff = neff 881 882 self._segment_start = segment 883 self.internal_id = internal_id 884 885 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
886 887 @property
888 - def backbone(self):
889 return self._calpha
890 891 @property
892 - def sequence(self):
893 return self._sequence
894 895 @property
896 - def torsion(self):
897 return self._torsion
898 899 @property
900 - def source_id(self):
901 return self._source_id
902 903 @property
904 - def start(self):
905 return self._start
906 907 @property
908 - def end(self):
909 return self._end
910 911 @property
912 - def score(self):
913 return self._score
914 915 @property
916 - def neff(self):
917 return self._neff
918 919 @property
920 - def segment(self):
921 return self._segment_start
922
923 - def transform(self, rotation, translation):
924 """ 925 Apply rotation/translation to fragment's coordinates in place. 926 """ 927 928 for ca in self.backbone: 929 newca = numpy.dot(ca, numpy.transpose(rotation)) + translation 930 for i in range(3): 931 ca[i] = newca[i]
932
933 - def _check_range(self, qstart, qend):
934 935 if not (self.qstart <= qstart <= qend <= self.qend): 936 raise ValueError('Region {0}..{1} is out of range {2.qstart}..{2.qend}'.format(qstart, qend, self))
937
938 - def anchored_around(self, rank):
939 """ 940 @return: True if the fragment is centered around position=C{rank}. 941 @rtype: bool 942 """ 943 944 if self.qstart < rank < self.qend: 945 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1): 946 return True 947 948 return False
949
950 - def backbone_at(self, qstart, qend):
951 """ 952 @return: the CA coordinates of the fragment at the specified subregion. 953 @rtype: list 954 """ 955 956 self._check_range(qstart, qend) 957 958 relstart = qstart - self.qstart 959 relend = qend - self.qstart + 1 960 961 return self.backbone[relstart : relend]
962
963 - def torsion_at(self, qstart, qend):
964 """ 965 @return: the torsion angles of the fragment at the specified subregion. 966 @rtype: list 967 """ 968 969 self._check_range(qstart, qend) 970 971 relstart = qstart - self.qstart 972 relend = qend - self.qstart + 1 973 974 return self.torsion[relstart : relend]
975
976 - def solvent_at(self, sa_string, qstart, qend):
977 978 self._check_range(qstart, qend) 979 980 relstart = qstart - self.qstart 981 relend = qend - self.qstart + 1 982 983 return sa_string[relstart : relend]
984
985 - def profile_at(self, source, qstart, qend):
986 987 self._check_range(qstart, qend) 988 989 start = qstart - self.qstart + self.start 990 end = qend - self.qstart + self.start 991 992 if hasattr(source, 'subregion'): 993 return source.subregion(start, end) 994 else: 995 return source[start - 1 : end]
996
997 - def chain_at(self, source, qstart, qend):
998 999 self._check_range(qstart, qend) 1000 1001 start = qstart - self.qstart + self.start 1002 end = qend - self.qstart + self.start 1003 1004 return source.subregion(start, end)
1005
1006 - def overlap(self, other):
1007 """ 1008 @type other: L{Assignment} 1009 @return: target positions, covered by both C{self} and C{other} 1010 @rtype: set of int 1011 """ 1012 1013 qranks = set(range(self.qstart, self.qend + 1)) 1014 sranks = set(range(other.qstart, other.qend + 1)) 1015 1016 return qranks.intersection(sranks)
1017
1018 - def rmsd_to(self, other, min_overlap=5):
1019 """ 1020 @return: the CA RMSD between C{self} and C{other}. 1021 1022 @param other: another fragment 1023 @type other: L{Assignment} 1024 @param min_overlap: require at least that number of overlapping residues 1025 (return None if not satisfied) 1026 @type min_overlap: int 1027 1028 @rtype: float 1029 """ 1030 1031 common = self.overlap(other) 1032 1033 if len(common) >= min_overlap: 1034 1035 qstart, qend = min(common), max(common) 1036 1037 q = self.backbone_at(qstart, qend) 1038 s = other.backbone_at(qstart, qend) 1039 1040 if len(q) > 0 and len(s) > 0: 1041 return csb.bio.utils.rmsd(q, s) 1042 1043 return None
1044
1045 - def nrmsd_to(self, other, min_overlap=5):
1046 1047 common = self.overlap(other) 1048 1049 if len(common) >= min_overlap: 1050 1051 qstart, qend = min(common), max(common) 1052 1053 q = self.backbone_at(qstart, qend) 1054 s = other.backbone_at(qstart, qend) 1055 1056 if len(q) > 0 and len(s) > 0: 1057 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ] 1058 1059 return None
1060
1061 - def mda_to(self, other, min_overlap=5):
1062 1063 common = self.overlap(other) 1064 1065 if len(common) >= min_overlap: 1066 1067 qstart, qend = min(common), max(common) 1068 1069 q = self.torsion_at(qstart, qend) 1070 s = other.torsion_at(qstart, qend) 1071 1072 if len(q) > 0 and len(s) > 0: 1073 1074 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:]) # phi: 2 .. L 1075 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1]) # psi: 1 .. L-1 1076 1077 return max(maxphi, maxpsi) 1078 1079 return None
1080
1081 - def to_rosetta(self, source, qstart=None, qend=None, weight=None):
1082 """ 1083 @deprecated: this method will be deleted soon. Use 1084 L{csb.bio.fragments.rosetta.OutputBuilder} instead. 1085 """ 1086 stream = csb.io.MemoryStream() 1087 1088 if weight is None: 1089 weight = self.probability 1090 if not qstart: 1091 qstart = self.qstart 1092 if not qend: 1093 qend = self.qend 1094 1095 source.compute_torsion() 1096 chain = self.chain_at(source, qstart, qend) 1097 1098 for i, r in enumerate(chain.residues): 1099 1100 acc = self.source_id[:4] 1101 ch = self.source_id[4].upper() 1102 1103 start = qstart - self.qstart + self.start + i 1104 aa = r.type 1105 ss = 'L' 1106 phi, psi, omega = 0, 0, 0 1107 if r.torsion.phi: 1108 phi = r.torsion.phi 1109 if r.torsion.psi: 1110 psi = r.torsion.psi 1111 if r.torsion.omega: 1112 omega = r.torsion.omega 1113 1114 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight)) 1115 1116 return stream.getvalue()
1117
1118 -class ClusterExhaustedError(ValueError):
1119 pass
1120
1121 -class ClusterEmptyError(ClusterExhaustedError):
1122 pass
1123
1124 -class ClusterDivergingError(RuntimeError):
1125 pass
1126
1127 -class FragmentCluster(object):
1128 """ 1129 Provides clustering/filtering of the fragments, covering a common residue 1130 in the target. Clustering is done via iterative shrinking of the cluster. 1131 At each iteration, node rejection (deletion) is attempted for each node. The 1132 node rejection, causing the most significant drop in the average pairwise 1133 distance (RMSD) in the cluster, is retained. This procedure is repeated 1134 until: 1) the average pairwise RMSD drops below the C{threshold} (converged), 1135 2) the cluster gets exhausted or 3) node rejection no longer 1136 causes a drop in the average distance (not converging). 1137 1138 @param items: cluster members 1139 @type items: iterable of L{ClusterNode}s 1140 @param threshold: RMSD threshold; continue shrinking until the mean distance 1141 drops below this value (default=1.5) 1142 @type threshold: float 1143 @param connectedness: use only nodes which are connected to at least c% of all 1144 initial nodes (default=0.5, that means 50%) 1145 @type connectedness: float 1146 """ 1147 1148 MIN_LENGTH = 6 1149
1150 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1151 1152 items = set(i for i in items if i.fragment.length >= FragmentCluster.MIN_LENGTH) 1153 1154 self._matrix = {} 1155 self._threshold = float(threshold) 1156 self._connectedness = float(connectedness) 1157 1158 for i in items: 1159 1160 self._matrix[i] = {} 1161 conn = 0.0 1162 1163 for j in items: 1164 distance = i.distance(j) 1165 if distance is not None: 1166 conn += 1 1167 self._matrix[i][j] = distance 1168 1169 if conn / len(items) < self.connectedness: 1170 # reject i as a first class node 1171 del self._matrix[i] 1172 1173 self._items = set(self._matrix.keys()) 1174 1175 if len(self._items) < 1: 1176 raise ClusterEmptyError() 1177 1178 self._initcount = self.count
1179 1180 @property
1181 - def count(self):
1182 return len(self._items)
1183 1184 @property
1185 - def items(self):
1186 return tuple(self._items)
1187 1188 @property
1189 - def fragments(self):
1190 return [i.fragment for i in self._items]
1191 1192 @property
1193 - def threshold(self):
1194 return self._threshold
1195 @threshold.setter
1196 - def threshold(self, value):
1197 self._threshold = float(value)
1198 1199 @property
1200 - def connectedness(self):
1201 return self._connectedness
1202
1203 - def _distances(self, skip=None):
1204 1205 d = [] 1206 1207 for i in self._matrix: 1208 if skip is i: 1209 continue 1210 1211 for j in self._matrix[i]: 1212 if skip is not j: 1213 d.append(self._matrix[i][j]) 1214 1215 return d
1216
1217 - def _distance(self, i, j):
1218 1219 if j in self._matrix[i]: 1220 return self._matrix[i][j] 1221 else: 1222 return None
1223
1224 - def mean(self, skip=None):
1225 """ 1226 @return: the current mean distance in the cluster 1227 @rtype: float 1228 """ 1229 1230 d = self._distances(skip=skip) 1231 1232 if len(d) > 0: 1233 return numpy.mean(d) 1234 else: 1235 raise ClusterExhaustedError()
1236
1237 - def centroid(self):
1238 """ 1239 @return: the current representative fragment 1240 @rtype: L{ClusterRep} 1241 1242 @note: the cluster rep is the node with the lowest average distance 1243 to all other nodes. If a fixed fragment exists, structurally similar 1244 to the rep, but longer, this fragment may be suggested as an alternative 1245 (see also L{ClusterRep}). 1246 """ 1247 1248 alt = None 1249 cen = None 1250 avg = None 1251 1252 for i in self._matrix: 1253 1254 curravg = numpy.mean(list(self._matrix[i].values())) 1255 1256 if avg is None or curravg < avg: 1257 avg = curravg 1258 cen = i 1259 elif curravg == avg: 1260 if i.fragment.length > cen.fragment.length: 1261 cen = i 1262 1263 d = self._distances() 1264 mean = numpy.mean(d) 1265 cons = sum(1.0 for i in d if i <= self.threshold) / len(d) 1266 1267 for i in self._matrix: 1268 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length: 1269 distance = self._distance(i, cen) 1270 if distance is not None and distance < 0.5 * self.threshold: 1271 if alt is None or alt.fragment.length < i.fragment.length: 1272 alt = i 1273 1274 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt, 1275 rejections=(self._initcount - self.count))
1276
1277 - def reject(self, item):
1278 """ 1279 Remove C{item} from the cluster. 1280 1281 @type item: L{ClusterNode} 1282 @raise ClusterExhaustedError: if this is the last remaining item 1283 """ 1284 1285 if self.count == 1: 1286 raise ClusterExhaustedError() 1287 1288 assert not item.fixed 1289 1290 for i in self._matrix: 1291 if item in self._matrix[i]: 1292 del self._matrix[i][item] 1293 1294 del self._matrix[item] 1295 self._items.remove(item)
1296
1297 - def shrinkone(self):
1298 """ 1299 Shrink the cluster by a single node. 1300 1301 @return: True on successful shrink, False otherwise (e.g. if 1302 already converged) 1303 @rtype: bool 1304 @raise ClusterExhaustedError: if exhausted 1305 @raise ClusterDivergingError: if not converging 1306 """ 1307 1308 mean = self.mean() 1309 if mean <= self.threshold or self.count == 1: 1310 return False # already shrunk enough 1311 1312 m = {} 1313 1314 for i in self._matrix: 1315 if not i.fixed: 1316 newmean = self.mean(skip=i) 1317 m[newmean] = i 1318 1319 if len(m) == 0: # only fixed items remaining 1320 raise ClusterExhaustedError() 1321 1322 newmean = min(m) 1323 1324 if newmean > mean: 1325 raise ClusterDivergingError() # can't converge, usually when fixed items are too far away from the average 1326 elif newmean < mean: 1327 junk = m[newmean] 1328 self.reject(junk) 1329 return True # successful shrink 1330 else: 1331 return False # converged
1332
1333 - def shrink(self, minitems=2):
1334 """ 1335 Start automatic shrinking. 1336 1337 @param minitems: absolute minimum of the number of nodes in the cluster 1338 @type minitems: int 1339 1340 @return: cluster's representative: the node with the lowest average 1341 distance to all other nodes in the cluster 1342 @rtype: L{ClusterRep} 1343 1344 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and 1345 still not converged 1346 """ 1347 1348 if self.count > minitems: 1349 1350 while self.shrinkone(): 1351 if self.count <= minitems: 1352 raise ClusterExhaustedError() 1353 else: 1354 raise ClusterExhaustedError() 1355 1356 return self.centroid()
1357
1358 -class ClusterNode(object):
1359 """ 1360 Cluster node. 1361 1362 @param fragment: fragment 1363 @type fragment: L{Assignment} 1364 @param distance: distance metric (a L{Metrics} member, default is RMSD) 1365 @type distance: str 1366 @param fixed: mark this node as fixed (cannot be rejected) 1367 @type fixed: bool 1368 """ 1369
1370 - def __init__(self, fragment, distance=Metrics.RMSD, fixed=False):
1371 1372 if fixed and fragment.length < FragmentCluster.MIN_LENGTH: 1373 raise ValueError("Can't fix a short fragment") 1374 1375 self.fragment = fragment 1376 self.fixed = bool(fixed) 1377 1378 self._distance = getattr(self.fragment, distance)
1379
1380 - def distance(self, other):
1381 """ 1382 @return: the distance between self and another node 1383 @type other: L{ClusterNode} 1384 @rtype: float 1385 """ 1386 return self._distance(other.fragment)
1387
1388 -class ClusterRep(object):
1389 """ 1390 Cluster's representative (centroid) node. This object carries the 1391 result of shrinking itself. 1392 1393 @param centroid: rep node 1394 @type centroid: L{ClusterNode} 1395 @param mean: current mean distance in the cluster 1396 @type mean: float 1397 @param consistency: percentage of pairwise distances below the RMSD C{threshold} 1398 @type consistency: float 1399 @param count: current number of nodes in the cluster 1400 @type count: int 1401 @param rejections: total number of rejections 1402 @type rejections: int 1403 @param alternative: suggested cluster rep alternative (e.g. structurally 1404 similar to the centroid, but longer) 1405 @type param: 1406 """ 1407
1408 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1409 1410 if isinstance(centroid, ClusterNode): 1411 centroid = centroid.fragment 1412 if isinstance(alternative, ClusterNode): 1413 alternative = alternative.fragment 1414 1415 self._centroid = centroid 1416 self._alternative = alternative 1417 self._mean = mean 1418 self._consistency = consistency 1419 self._count = count 1420 self._rejections = rejections
1421 1422 @property
1423 - def confidence(self):
1424 """ 1425 Confidence of assignment: log10(count) * consistency 1426 """ 1427 if self.count <= 0 or self.count is None or self.consistency is None: 1428 return 0 1429 else: 1430 return numpy.log10(self.count) * self.consistency
1431 1432 @property
1433 - def centroid(self):
1434 return self._centroid
1435 1436 @property
1437 - def alternative(self):
1438 return self._alternative
1439 1440 @property
1441 - def has_alternative(self):
1442 return self._alternative is not None
1443 1444 @property
1445 - def mean(self):
1446 return self._mean
1447 1448 @property
1449 - def consistency(self):
1450 return self._consistency
1451 1452 @property
1453 - def count(self):
1454 return self._count
1455 1456 @property
1457 - def rejections(self):
1458 return self._rejections
1459
1460 - def exchange(self):
1461 """ 1462 If an alternative is available, swap the centroid and the alternative. 1463 """ 1464 1465 if self._alternative is not None: 1466 1467 centroid = self._centroid 1468 self._centroid = self._alternative 1469 self._alternative = centroid
1470
1471 - def to_rosetta(self, source):
1472 """ 1473 @deprecated: this method is obsolete and will be deleted soon 1474 """ 1475 return self.centroid.to_rosetta(source, weight=self.confidence)
1476
1477 -class AdaptedAssignment(object):
1478 1479 @staticmethod
1480 - def with_overhangs(center, start, end, overhang=1):
1481 1482 if center.centroid.qstart <= (start - overhang): 1483 start -= overhang 1484 elif center.centroid.qstart < start: 1485 start = center.centroid.qstart 1486 1487 if center.centroid.qend >= (end + overhang): 1488 end += overhang 1489 elif center.centroid.qend > end: 1490 end = center.centroid.end 1491 1492 return AdaptedAssignment(center, start, end)
1493
1494 - def __init__(self, center, qstart, qend):
1495 1496 if qstart < center.centroid.qstart: 1497 raise ValueError(qstart) 1498 if qend > center.centroid.qend: 1499 raise ValueError(qend) 1500 1501 self._qstart = qstart 1502 self._qend = qend 1503 self._center = center
1504 1505 @property
1506 - def fragment(self):
1507 return self._center.centroid
1508 1509 @property
1510 - def center(self):
1511 return self._center
1512 1513 @property
1514 - def confidence(self):
1515 return self._center.confidence
1516 1517 @property
1518 - def qstart(self):
1519 return self._qstart
1520 1521 @property
1522 - def qend(self):
1523 return self._qend
1524 1525 @property
1526 - def backbone(self):
1527 return self.fragment.backbone_at(self.qstart, self.qend)
1528
1529 - def chain(self, source):
1530 return self.fragment.chain_at(source, self.qstart, self.qend)
1531
1532 - def to_rosetta(self, source):
1533 return self.fragment.to_rosetta(source, self.qstart, self.qend, self.confidence)
1534
1535 -class SmoothFragmentMap(csb.core.AbstractContainer):
1536
1537 - def __init__(self, length, centroids):
1538 1539 if not length > 0: 1540 raise ValueError(length) 1541 1542 self._length = int(length) 1543 self._slots = set(range(1, self._length + 1)) 1544 self._map = {} 1545 1546 centers = list(centroids) 1547 centers.sort(key=lambda i: i.confidence, reverse=True) 1548 1549 for c in centers: 1550 self.assign(c)
1551 1552 @property
1553 - def _children(self):
1554 return self._map
1555
1556 - def assign(self, center):
1557 1558 for r in range(center.centroid.qstart, center.centroid.qend + 1): 1559 if r in self._slots: 1560 self._map[r] = center 1561 self._slots.remove(r)
1562
1563 - def patches(self):
1564 1565 center = None 1566 start = None 1567 end = None 1568 1569 for r in range(1, self._length + 1): 1570 1571 if center is None: 1572 if r in self._map: 1573 center = self._map[r] 1574 start = end = r 1575 else: 1576 center = None 1577 start = end = None 1578 else: 1579 if r in self._map: 1580 if self._map[r] is center: 1581 end = r 1582 else: 1583 yield AdaptedAssignment(center, start, end) 1584 center = self._map[r] 1585 start = end = r 1586 else: 1587 yield AdaptedAssignment(center, start, end) 1588 center = None 1589 start = end = None
1590
1591 1592 -class ResidueEventInfo(object):
1593
1594 - def __init__(self, rank, confidence=None, count=None, confident=True, rep=None):
1595 1596 self.rank = rank 1597 self.confidence = confidence 1598 self.confident = confident 1599 self.count = count 1600 self.rep = rep
1601
1602 -class RosettaFragsetFactory(object):
1603 """ 1604 Simplifies the construction of fragment libraries. 1605 """ 1606
1607 - def __init__(self):
1608 import csb.bio.fragments.rosetta as rosetta 1609 self.rosetta = rosetta
1610
1611 - def make_fragset(self, target):
1612 """ 1613 Build a fragment library given a L{Target} and its L{Assignment}s. 1614 1615 @param target: target protein 1616 @type target: L{Target} 1617 1618 @rtype: L{RosettaFragmentMap} 1619 """ 1620 1621 frag_factory = self.rosetta.RosettaFragment 1622 fragments = list(map(frag_factory.from_object, target.matches)) 1623 #fragments = [ frag_factory.from_object(f) for f in target.matches if f.length >= 6 ] 1624 fragments.sort() 1625 1626 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1627
1628 - def make_chopped(self, fragments, window):
1629 """ 1630 Build a fixed-length fragment library from a list of 1631 variable-length L{Assignment}s. 1632 1633 @param fragments: source fragments 1634 @type fragments: iterable of L{RosettaFragment}s 1635 @param window: fixed-length fragment size (for classic Rosetta: choose 9) 1636 @type window: int 1637 1638 @return: fixed-length fragment library 1639 @rtype: L{RosettaFragmentMap} 1640 """ 1641 1642 frags = [] 1643 1644 for f in fragments: 1645 for qs in range(f.qstart, f.qend - window + 1): 1646 frags.append(f.subregion(qs, qs + window - 1)) 1647 1648 return self.rosetta.RosettaFragmentMap(frags)
1649
1650 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1651 """ 1652 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments). 1653 The regions to be complemented are determined by calculating the confidence 1654 at each residue (by filtering). 1655 1656 1657 @param target: target protein 1658 @type target: L{Target} 1659 @param filling: additional fragments to place in the low-conf regions 1660 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment} 1661 @param threshold: confidence threshold 1662 @type threshold: float 1663 1664 @return: complemented fragment library 1665 @rtype: L{RosettaFragmentMap} 1666 """ 1667 1668 fragmap = self.make_fragset(target) 1669 covered = set() 1670 1671 for r in target.residues: 1672 1673 if r.assignments.length == 0: 1674 if callback: 1675 callback(ResidueEventInfo(r.native.rank, None, 0, False)) 1676 continue 1677 1678 cluster = r.filter() 1679 if cluster is None: 1680 if callback: 1681 callback(ResidueEventInfo(r.native.rank, 0, 0, False)) 1682 continue 1683 1684 if cluster.confidence >= threshold: 1685 covered.add(r.native.rank) 1686 elif callback: 1687 callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, False)) 1688 1689 for r in target.residues: 1690 if r.native.rank not in covered: # true for gaps and low-conf residues 1691 fragmap.mark_unconfident(r.native.rank) 1692 1693 for frag in filling: 1694 fragmap.complement(frag) 1695 1696 return fragmap
1697
1698 - def make_filtered(self, target, extend=False, callback=None):
1699 """ 1700 Builed a filtered fragment library (by clustering), containing only 1701 representative fragments (cluster centroids). 1702 1703 @param target: target protein 1704 @type target: L{Target} 1705 @param extend: if True, pick alternative reps if available 1706 @type extend: bool 1707 1708 @return: filtered fragment library 1709 @rtype: L{RosettaFragmentMap} 1710 """ 1711 1712 fragments = [] 1713 1714 for r in target.residues: 1715 if r.assignments.length == 0: 1716 continue 1717 1718 cluster = r.filter(extend=extend) 1719 if cluster is None: 1720 continue 1721 1722 if extend and cluster.has_alternative: 1723 best = cluster.alternative 1724 else: 1725 best = cluster.centroid 1726 1727 fragment = self.rosetta.RosettaFragment.from_object(best) 1728 fragments.append(fragment) 1729 if callback: 1730 callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, rep=cluster.centroid)) 1731 1732 fragments.sort() 1733 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1734
1735 - def mix(self, *fragsets):
1736 """ 1737 Mix fragments from multiple libraries. 1738 1739 @type fragsets: L{RosettaFragmentMap} 1740 @return: mixed fragment library 1741 @rtype: L{RosettaFragmentMap} 1742 """ 1743 1744 fragments = [] 1745 length = 0 1746 1747 for fragset in fragsets: 1748 if fragset._length > length: 1749 length = fragset._length 1750 1751 for fragment in fragset: 1752 fragments.append(fragment) 1753 1754 return self.rosetta.RosettaFragmentMap(fragments, length)
1755
1756 1757 -class BenchmarkAdapter(object):
1758
1759 - class Connection(object):
1760 1761 FACTORY = None 1762 DSN = None 1763
1764 - def __init__(self, factory=None, dsn=None):
1765 1766 self.factory = factory or self.__class__.FACTORY 1767 self.cs = dsn or self.__class__.DSN 1768 self.connection = None 1769 self.cursor = None
1770
1771 - def __enter__(self):
1772 1773 self.connection = self.factory(self.cs) 1774 try: 1775 self.cursor = self.connection.cursor() 1776 except: 1777 self.connection.close() 1778 raise 1779 return self
1780
1781 - def __exit__(self, *args):
1782 try: 1783 if not self.cursor.closed: 1784 self.cursor.close() 1785 finally: 1786 if not self.connection.closed: 1787 self.connection.close()
1788
1789 - def __init__(self, pdb_paths, connection_string=None, factory=AssignmentFactory()):
1790 1791 self._pdb = pdb_paths 1792 self._connection = None 1793 1794 from csb.bio.io.wwpdb import find, StructureParser 1795 self._parser = StructureParser 1796 self._find = find 1797 self._factory = factory 1798 1799 try: 1800 import psycopg2.extras 1801 except ImportError: 1802 raise RuntimeError('Please install the psycopg2 module first') 1803 1804 if connection_string is None: 1805 connection_string = self.connection_string() 1806 1807 BenchmarkAdapter.Connection.FACTORY = psycopg2.extras.DictConnection 1808 BenchmarkAdapter.Connection.DSN = connection_string
1809 1810 @staticmethod
1811 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
1812 1813 fields = ['dbname={0}'.format(database)] 1814 1815 if host: 1816 fields.append('host={0}'.format(host)) 1817 if username: 1818 fields.append('user={0}'.format(username)) 1819 fields.append('password={0}'.format(password)) 1820 1821 return ' '.join(fields)
1822
1823 - def targets(self, benchmark_id):
1824 1825 with BenchmarkAdapter.Connection() as db: 1826 1827 db.cursor.callproc('reporting."GetTargets"', (benchmark_id,)) 1828 return db.cursor.fetchall()
1829
1830 - def target_details(self, target_id):
1831 1832 with BenchmarkAdapter.Connection() as db: 1833 1834 db.cursor.callproc('reporting."GetTargetDetails"', (target_id,)) 1835 return db.cursor.fetchall()
1836
1837 - def assignments(self, target_id, type):
1838 1839 with BenchmarkAdapter.Connection() as db: 1840 1841 db.cursor.callproc('reporting."GetAssignments"', (target_id, type)) 1842 return db.cursor.fetchall()
1843
1844 - def scores(self, benchmark_id, type):
1845 1846 with BenchmarkAdapter.Connection() as db: 1847 1848 db.cursor.callproc('reporting."GetScores"', (benchmark_id, type)) 1849 return db.cursor.fetchall()
1850
1851 - def centroids(self, benchmark_id):
1852 1853 with BenchmarkAdapter.Connection() as db: 1854 1855 db.cursor.callproc('reporting."GetCentroids"', (benchmark_id,)) 1856 return db.cursor.fetchall()
1857
1858 - def target_segments(self, target_id):
1859 1860 with BenchmarkAdapter.Connection() as db: 1861 1862 db.cursor.callproc('reporting."GetTargetSegments"', (target_id,)) 1863 data = db.cursor.fetchall() 1864 1865 return [ TargetSegment(row['Start'], row['End'], row['Count']) for row in data ]
1866
1867 - def structure(self, accession, chain=None):
1868 1869 pdbfile = self._find(accession, self._pdb) 1870 1871 if not pdbfile and chain: 1872 pdbfile = self._find(accession + chain, self._pdb) 1873 1874 if not pdbfile: 1875 raise IOError('{0} not found here: {1}'.format(accession, self._pdb)) 1876 1877 return self._parser(pdbfile).parse_structure()
1878
1879 - def prediction(self, target_id, type):
1880 1881 info = self.target_details(target_id) 1882 if not info: 1883 raise ValueError('No such Target ID in the database: {0}'.format(target_id)) 1884 row = info[0] 1885 1886 id = row["Accession"] 1887 length = float(row["Length"]) 1888 overlap = float(row["MaxOverlap"]) / (length or 1.) 1889 1890 native = self.structure(id[:4], id[4]).chains[id[4]] 1891 segments = self.target_segments(target_id) 1892 target = self._factory.target(id, length, native.residues, overlap, segments) 1893 1894 source = None 1895 1896 for row in self.assignments(target_id, type): 1897 1898 src_accession = row['Source'][:4] 1899 src_chain = row['Source'][4] 1900 1901 if source is None or source.accession != src_accession: 1902 try: 1903 source = self.structure(src_accession, src_chain) 1904 except (IOError, csb.bio.structure.Broken3DStructureError) as ex: 1905 target.errors.append(ex) 1906 continue 1907 1908 if src_chain == '_': 1909 frag_chain = source.first_chain 1910 else: 1911 frag_chain = source.chains[src_chain] 1912 if not frag_chain.has_torsion: 1913 frag_chain.compute_torsion() 1914 1915 fragment = self._factory.assignment( 1916 source=frag_chain, 1917 start=row['SourceStart'], 1918 end=row['SourceEnd'], 1919 id=row['FragmentName'], 1920 qstart=row['Start'], 1921 qend=row['End'], 1922 probability=row['Probability'], 1923 score=row['Score'], 1924 neff=row['Neff'], 1925 rmsd=row['RMSD'], 1926 tm_score=row['TMScore'], 1927 segment=row['SegmentStart'], 1928 internal_id=row['InternalID']) 1929 1930 target.assign(fragment) 1931 1932 return target
1933