Package csb :: Package bio :: Package hmm
[frames] | no frames]

Source Code for Package csb.bio.hmm

   1  """ 
   2  HHpred and Hidden Markov Model APIs. 
   3   
   4  This package defines the abstractions for working with HHpred's HMMs and 
   5  hit lists. L{ProfileHMM} is the most important object of this module. 
   6  It describes a sequence profile hidden Markov model in the way HHpred 
   7  sees this concept: 
   8   
   9      - a profile is composed of a list of L{HMMLayer}s, which contain a 
  10        number of L{State}s 
  11      - these L{States} can be of different types: Match, Insertion Deletion, etc. 
  12      - a profile contains a multiple alignment, from which it is derived 
  13      - this multiple alignment is an A3M (condensed) Alignment, where the  
  14        first sequence is a master sequence 
  15      - the match states in all layers correspond to the residues of the master 
  16        sequence 
  17   
  18  L{ProfileHMM} objects provide list-like access to their layers: 
  19   
  20      >>> hmm.layers[1] 
  21      <HMMLayer>    # first layer: layer at master residue=1 
  22       
  23  Every layer provides dictionary-like access to its states: 
  24   
  25      >>> layer[States.Match] 
  26      <Match State> 
  27       
  28  and every state provides dictionary-like access to its transitions to other 
  29  states: 
  30   
  31      >>> state = hmm.layers[1][States.match] 
  32      >>> state.transitions[States.Insertion] 
  33      <Transition>       # Match > Insertion 
  34      >>> transition.predecessor 
  35      <Match State>      # source state 
  36      >>> transition.successor 
  37      <Insertion State>  # target state 
  38   
  39  Whether this transition points to a state at the same (i) or the next layer 
  40  (i+1) depends on the semantics of the source and the target states. 
  41   
  42  Building HMMs from scratch is supported through a number of C{append} methods 
  43  at various places: 
  44   
  45      >>> layer = HMMLayer(...) 
  46      >>> layer.append(State(...)) 
  47      >>> hmm.layers.append(layer) 
  48   
  49  See L{HMMLayersCollection}, L{HMMLayer}, L{EmissionTable} and L{TransitionTable} 
  50  for details. 
  51  """ 
  52   
  53  import sys 
  54  import math 
  55   
  56  import csb.core 
  57  import csb.io 
  58  import csb.bio.structure as structure 
  59  import csb.bio.sequence as sequence 
  60   
  61  from csb.core import Enum 
62 63 64 -class UnobservableStateError(AttributeError):
65 pass
66
67 -class StateNotFoundError(csb.core.ItemNotFoundError):
68 pass
69
70 -class TransitionNotFoundError(StateNotFoundError):
71 pass
72
73 -class LayerIndexError(csb.core.CollectionIndexError):
74 pass
75
76 -class StateExistsError(KeyError):
77 pass
78
79 -class TransitionExistsError(KeyError):
80 pass
81
82 -class EmissionExistsError(KeyError):
83 pass
84
85 -class HMMArgumentError(ValueError):
86 pass
87
88 89 -class States(csb.core.enum):
90 """ 91 Enumeration of HMM state types 92 """ 93 Match='M'; Insertion='I'; Deletion='D'; Start='S'; End='E'
94
95 -class ScoreUnits(csb.core.enum):
96 """ 97 Enumeration of HMM emission and transition score units 98 """ 99 LogScales='LogScales'; Probability='Probability'
100 101 BACKGROUND = [ 0.076627178753322270, 0.018866884241976509, 0.053996136712517316, 102 0.059788009880742142, 0.034939432842683173, 0.075415244982547675, 103 0.036829356494115069, 0.050485048600600511, 0.059581159080509941, 104 0.099925728794059046, 0.021959667190729986, 0.040107059298840765, 105 0.045310838527464106, 0.032644867589507229, 0.051296350550656143, 106 0.046617000834108295, 0.071051060827250878, 0.072644631719882335, 107 0.012473412286822654, 0.039418044025976547 ] 108 """ 109 Background amino acid probabilities 110 """ 111 112 RELATIVE_SA = { 'A': 0.02, 'B': 0.14, 'C': 0.33, 'D': 0.55, 'E': 1.00 } 113 """ 114 Relative solvent accessibility codes (upper bounds) 115 """
116 117 118 -class ProfileHMM(object):
119 """ 120 Describes a protein profile Hidden Markov Model. 121 Optional parameters: 122 123 @param units: defines the units of the transition and emission scores 124 @type units: L{ScoreUnits} 125 @param scale: the scaling factor used to convert emission/transition 126 probabilities 127 @type scale: float 128 @param logbase: the base of the logarithm used for scaling the emission and 129 transition probabilities 130 @type logbase: float 131 """ 132
133 - def __init__(self, units=ScoreUnits.LogScales, scale=-1000., logbase=2):
134 135 self._name = None 136 self._id = None 137 self._family = None 138 self._length = ProfileLength(0, 0) 139 self._alignment = None 140 self._consensus = None 141 self._dssp = None 142 self._dssp_solvent = None 143 self._psipred = None 144 self._effective_matches = None 145 self._evd = EVDParameters(None, None) 146 self._version = None 147 self._pseudocounts = False 148 self._emission_pseudocounts = False 149 self._transition_pseudocounts = False 150 self._layers = HMMLayersCollection() 151 self._start = State(States.Start) 152 self._start_insertion = None 153 self._end = State(States.End) 154 self._scale = scale 155 self._logbase = logbase 156 if units is None: 157 self._score_units = ScoreUnits.LogScales 158 else: 159 self._score_units = units
160 161 @property
162 - def name(self):
163 """ 164 Profile name (NAME) 165 @rtype: str 166 """ 167 return self._name
168 @name.setter
169 - def name(self, value):
170 self._name = str(value)
171 172 @property
173 - def id(self):
174 """ 175 Profile entry ID (FILE) 176 @rtype: str 177 """ 178 return self._id
179 @id.setter
180 - def id(self, value):
181 self._id = str(value)
182 183 @property
184 - def family(self):
185 """ 186 Alternative entry ID (FAM) 187 @rtype: str 188 """ 189 return self._family
190 @family.setter
191 - def family(self, value):
192 self._family = str(value)
193 194 @property
195 - def length(self):
196 """ 197 Profile length 198 @rtype: L{ProfileLength} 199 """ 200 return self._length
201 @length.setter
202 - def length(self, value):
203 if not isinstance(value, ProfileLength): 204 raise TypeError(value) 205 self._length = value
206 207 @property
208 - def alignment(self):
209 """ 210 Source multiple alignment 211 @rtype: L{A3MAlignment} 212 """ 213 return self._alignment
214 @alignment.setter
215 - def alignment(self, value):
216 if not isinstance(value, sequence.A3MAlignment): 217 raise TypeError(value) 218 self._alignment = value
219 220 @property
221 - def consensus(self):
222 """ 223 Consensus sequence 224 @rtype: L{AbstractSequence} 225 """ 226 return self._consensus
227 @consensus.setter
228 - def consensus(self, value):
229 if not isinstance(value, sequence.AbstractSequence): 230 raise TypeError(value) 231 self._consensus = value
232 233 @property
234 - def dssp(self):
235 """ 236 DSSP (calculated) secondary structure 237 @rtype: L{SecondaryStructure} 238 """ 239 return self._dssp
240 @dssp.setter
241 - def dssp(self, value):
242 if not isinstance(value, structure.SecondaryStructure): 243 raise TypeError(value) 244 self._dssp = value
245 246 @property
247 - def dssp_solvent(self):
248 """ 249 Solvent accessibility values 250 @rtype: str 251 """ 252 return self._dssp_solvent
253 @dssp_solvent.setter
254 - def dssp_solvent(self, value):
255 self._dssp_solvent = str(value)
256 257 @property
258 - def psipred(self):
259 """ 260 PSIPRED (predicted) secondary structure 261 @rtype: L{SecondaryStructure} 262 """ 263 return self._psipred
264 @psipred.setter
265 - def psipred(self, value):
266 if not isinstance(value, structure.SecondaryStructure): 267 raise TypeError(value) 268 self._psipred = value
269 270 @property
271 - def effective_matches(self):
272 """ 273 Number of effective matches (NEFF) 274 """ 275 return self._effective_matches
276 @effective_matches.setter
277 - def effective_matches(self, value):
278 self._effective_matches = value
279 280 @property
281 - def evd(self):
282 """ 283 Extreme-value distribution parameters (EVD) 284 @rtype: L{EVDParameters} 285 """ 286 return self._evd
287 @evd.setter
288 - def evd(self, value):
289 if not isinstance(value, EVDParameters): 290 raise TypeError(value) 291 self._evd = value
292 293 @property
294 - def version(self):
295 """ 296 Format version number (HHsearch) 297 @rtype: str 298 """ 299 return self._version
300 @version.setter
301 - def version(self, value):
302 self._version = str(value)
303 304 @property
305 - def pseudocounts(self):
306 """ 307 @rtype: bool 308 """ 309 return self._pseudocounts
310 @pseudocounts.setter
311 - def pseudocounts(self, value):
312 self._pseudocounts = bool(value)
313 314 @property
315 - def emission_pseudocounts(self):
316 """ 317 @rtype: bool 318 """ 319 return self._emission_pseudocounts
320 @emission_pseudocounts.setter
321 - def emission_pseudocounts(self, value):
322 self._emission_pseudocounts = bool(value)
323 324 @property
325 - def transition_pseudocounts(self):
326 """ 327 @rtype: bool 328 """ 329 return self._transition_pseudocounts
330 @transition_pseudocounts.setter
331 - def transition_pseudocounts(self, value):
332 self._transition_pseudocounts = bool(value)
333 334 @property
335 - def layers(self):
336 """ 337 List-like access to the HMM's layers 338 @rtype: L{HMMLayersCollection} 339 """ 340 return self._layers
341 342 @property
343 - def start(self):
344 """ 345 Start state (at the start layer) 346 @rtype: L{State} 347 """ 348 return self._start
349 @start.setter
350 - def start(self, value):
351 if value is None or (isinstance(value, State) and value.type == States.Start): 352 self._start = value 353 else: 354 raise TypeError(value)
355 356 @property
357 - def start_insertion(self):
358 """ 359 Insertion state at the start layer 360 @rtype: L{State} 361 """ 362 return self._start_insertion
363 @start_insertion.setter
364 - def start_insertion(self, value):
365 if value is None or (isinstance(value, State) and value.type == States.Insertion): 366 self._start_insertion = value 367 else: 368 raise TypeError(value)
369 370 @property
371 - def end(self):
372 """ 373 Final state (at the end layer) 374 @rtype: L{State} 375 """ 376 return self._end
377 @end.setter
378 - def end(self, value):
379 if value is None or (isinstance(value, State) and value.type == States.End): 380 self._end = value 381 else: 382 raise TypeError(value)
383 384 @property
385 - def scale(self):
386 """ 387 Score scaling factor 388 @rtype: float 389 """ 390 return self._scale
391 392 @property
393 - def logbase(self):
394 """ 395 Base of the logarithm used for score scaling 396 @rtype: float 397 """ 398 return self._logbase
399 400 @property
401 - def score_units(self):
402 """ 403 Current score units 404 @rtype: L{ScoreUnits} member 405 """ 406 return self._score_units
407 408 @property
409 - def residues(self):
410 """ 411 List of representative residues, attached to each layer 412 @rtype: collection of L{Residue} 413 """ 414 res = [layer.residue for layer in self.layers] 415 return csb.core.ReadOnlyCollectionContainer( 416 res, type=structure.Residue, start_index=1)
417 418 @property
419 - def all_layers(self):
420 """ 421 A list of layers including start and start_insertion 422 @rtype: list of L{HMMLayer} 423 """ 424 complete_layers = [] 425 first_layer = HMMLayer(rank=0, residue=None) 426 first_layer.append(self.start) 427 if self.start_insertion: 428 first_layer.append(self.start_insertion) 429 complete_layers.append(first_layer) 430 for layer in self.layers: 431 complete_layers.append(layer) 432 433 return complete_layers
434 435 @property
436 - def has_structure(self):
437 """ 438 True if this profile contains structural data 439 @rtype: bool 440 """ 441 has = False 442 for layer in self.layers: 443 if layer.residue.has_structure: 444 return True 445 return has
446
447 - def serialize(self, file_name):
448 """ 449 Serialize this HMM to a file. 450 451 @param file_name: target file name 452 @type file_name: str 453 """ 454 rec = sys.getrecursionlimit() 455 sys.setrecursionlimit(10000) 456 csb.io.Pickle.dump(self, open(file_name, 'wb')) 457 sys.setrecursionlimit(rec)
458 459 @staticmethod
460 - def deserialize(file_name):
461 """ 462 De-serialize an HMM from a file. 463 464 @param file_name: source file name (pickle) 465 @type file_name: str 466 """ 467 rec = sys.getrecursionlimit() 468 sys.setrecursionlimit(10000) 469 try: 470 return csb.io.Pickle.load(open(file_name, 'rb')) 471 finally: 472 sys.setrecursionlimit(rec)
473
474 - def _convert(self, units, score, scale, logbase):
475 476 if units == ScoreUnits.Probability: 477 return logbase ** (score / scale) 478 elif units == ScoreUnits.LogScales: 479 if score == 0: 480 #score = sys.float_info.min 481 return None 482 return math.log(score, logbase) * scale 483 else: 484 raise ValueError('Unknown target unit {0}'.format(units))
485
486 - def to_hmm(self, output_file=None, convert_scores=False):
487 """ 488 Dump the profile in HHM format. 489 490 @param output_file: the output file name 491 @type output_file: str 492 @param convert_scores: if True, forces automatic convertion to 493 L{ScoreUnits}.LogScales, which is required 494 by the output file format 495 @type convert_scores: bool 496 """ 497 from csb.bio.io.hhpred import HHMFileBuilder 498 499 if convert_scores: 500 self.convert_scores(ScoreUnits.LogScales) 501 502 temp = csb.io.MemoryStream() 503 504 builder = HHMFileBuilder(temp) 505 builder.add_hmm(self) 506 507 data = temp.getvalue() 508 temp.close() 509 510 if not output_file: 511 return data 512 else: 513 with csb.io.EntryWriter(output_file, close=False) as out: 514 out.write(data)
515
516 - def segment(self, start, end):
517 """ 518 Extract a sub-segment of the profile. 519 520 @param start: start layer of the segment (rank) 521 @type start: int 522 @param end: end layer of the segment (rank) 523 @type end: int 524 525 @return: a deepcopy of the extracted HMM segment 526 @rtype: L{ProfileHMMSegment} 527 """ 528 return ProfileHMMSegment(self, start, end)
529
530 - def subregion(self, start, end):
531 532 return ProfileHMMRegion(self, start, end)
533
534 - def add_emission_pseudocounts(self, *a, **k):
535 """ 536 See L{csb.bio.hmm.pseudocounts.PseudocountBuilder} 537 """ 538 from csb.bio.hmm.pseudocounts import PseudocountBuilder 539 PseudocountBuilder(self).add_emission_pseudocounts(*a, **k)
540
541 - def add_transition_pseudocounts(self, *a, **k):
542 """ 543 See L{csb.bio.hmm.pseudocounts.PseudocountBuilder} 544 """ 545 from csb.bio.hmm.pseudocounts import PseudocountBuilder 546 PseudocountBuilder(self).add_transition_pseudocounts(*a, **k)
547
548 - def structure(self, chain_id=None, accession=None):
549 """ 550 Extract the structural information from the HMM. 551 552 @param accession: defines the accession number of the structure 553 @type accession: str 554 @param chain_id: defines explicitly the chain identifier 555 @type chain_id: str 556 557 @return: a shallow L{Structure} wrapper around the residues in the HMM. 558 @rtype: L{Structure} 559 """ 560 struct = structure.Structure(accession or self.id) 561 chain = self.chain(chain_id) 562 struct.chains.append(chain) 563 564 return struct
565
566 - def chain(self, chain_id=None):
567 """ 568 Extract the structural information from the HMM. 569 570 @param chain_id: defines explicitly the chain identifier 571 @type chain_id: str 572 573 @return: a shallow L{Chain} wrapper around the residues in the HMM. 574 @rtype: L{Chain} 575 """ 576 if chain_id is None: 577 if self.id: 578 chain_id = self.id.rstrip()[-1] 579 else: 580 chain_id = '_' 581 582 return structure.Chain(chain_id, 583 type=sequence.SequenceTypes.Protein, 584 residues=self.residues)
585
586 - def emission_profile(self):
587 """ 588 Extract the emission scores of all match states in the profile. 589 The metric of the emission scores returned depends on the current 590 hmm.score_units setting - you may need to call hmm.convert_scores() 591 to adjust the hmm to your particular needs. 592 593 @return: a list of dictionaries; each dict key is a single amino acid 594 @rtype: list 595 """ 596 profile = [] 597 598 for layer in self.layers: 599 emission = {} 600 601 for aa in layer[States.Match].emission: 602 emission[str(aa)] = layer[States.Match].emission[aa] or 0.0 603 profile.append(emission) 604 605 return profile
606
607 - def convert_scores(self, units=ScoreUnits.Probability, method=None):
608 """ 609 Convert emission and transition scores to the specified units. 610 611 @param units: the target units for the conversion (a member of 612 L{ScoreUnits}). 613 @type units: L{csb.core.EnumItem} 614 @param method: if defined, implements the exact mathematical 615 transformation that will be applied. It must be a 616 function or lambda expression with the following 617 signature:: 618 def (target_units, score, scale, logbase) 619 620 and it has to return the score converted to 621 C{target_units}. If method performs a conversion from 622 probabilities to scaled logs, you should also update 623 C{hmm.scale} and C{hmm.logbase}. 624 @type method: function, lambda 625 """ 626 627 if self._score_units == units: 628 return 629 630 if method is not None: 631 convert = method 632 else: 633 convert = self._convert 634 635 for layer in self.layers: 636 for state_kind in layer: 637 state = layer[state_kind] 638 if not state.silent: 639 for residue in state.emission: 640 if state.emission[residue] is not None: 641 state.emission.update(residue, convert( 642 units, state.emission[residue], 643 self.scale, self.logbase)) 644 for residue in state.background: 645 if state.background[residue] is not None: 646 state.background.update(residue, convert( 647 units, state.background[residue], 648 self.scale, self.logbase)) 649 for tran_kind in state.transitions: 650 transition = state.transitions[tran_kind] 651 transition.probability = convert(units, transition.probability, 652 self.scale, self.logbase) 653 # The Neff-s are interger numbers and should not be transformed 654 # (except when writing the profile to a hhm file) 655 656 if self.start_insertion: 657 for t_it in self.start_insertion.transitions: 658 transition = self.start_insertion.transitions[t_it] 659 transition.probability = convert(units, transition.probability, 660 self.scale, self.logbase) 661 662 for residue in self.start_insertion.emission: 663 state = self.start_insertion 664 if state.emission[residue] is not None: 665 state.emission.update(residue, 666 convert(units, state.emission[residue], self.scale, self.logbase)) 667 state.background.update(residue, 668 convert(units, state.background[residue], self.scale, self.logbase)) 669 670 for tran_kind in self.start.transitions: 671 transition = self.start.transitions[tran_kind] 672 transition.probability = convert(units, 673 transition.probability, self.scale, self.logbase) 674 675 676 self._score_units = units
677
678 - def emission_similarity(self, other):
679 """ 680 Compute the Log-sum-of-odds score between the emission tables of self 681 and other (Soeding 2004). If no observable Match state is found at a 682 given layer, the Insertion state is used instead. 683 684 @note: This is not a full implementation of the formula since only 685 emission vectors are involved in the computation and any transition 686 probabilities are ignored. 687 688 @param other: the subject HMM 689 @type other: L{ProfileHMM} 690 691 @return: emission log-sum-of-odds similarity between C{self} and 692 C{other} 693 @rtype: float 694 695 @raise ValueError: when self and other differ in their length, when the 696 score_units are not Probability, or when no 697 observable states are present 698 """ 699 score = 1 700 701 if self.layers.length != other.layers.length or self.layers.length < 1: 702 raise ValueError('Both HMMs must have the same nonzero number of layers') 703 if self.score_units != ScoreUnits.Probability or \ 704 other.score_units != ScoreUnits.Probability: 705 raise ValueError('Scores must be converted to probabilities first.') 706 707 for q_layer, s_layer in zip(self.layers, other.layers): 708 709 try: 710 if States.Match in q_layer and not q_layer[States.Match].silent: 711 q_state = q_layer[States.Match] 712 else: 713 q_state = q_layer[States.Insertion] 714 715 if States.Match in s_layer and not s_layer[States.Match].silent: 716 s_state = s_layer[States.Match] 717 else: 718 s_state = s_layer[States.Insertion] 719 except csb.core.ItemNotFoundError: 720 raise ValueError('Query and subject must contain observable states ' 721 'at each layer') 722 723 emission_dotproduct = 0 724 725 for aa in q_state.emission: 726 727 q_emission = q_state.emission[aa] or sys.float_info.min 728 s_emission = s_state.emission[aa] or sys.float_info.min 729 emission_dotproduct += (q_emission * s_emission / 730 q_state.background[aa]) 731 732 score *= emission_dotproduct 733 734 return math.log(score)
735
736 - def _assign_secstructure(self):
737 """ 738 Attach references from each profile layer to the relevant DSSP secondary 739 structure element. 740 """ 741 assert self.dssp is not None 742 743 for motif in self.dssp: 744 for i in range(motif.start, motif.end + 1): 745 self.layers[i].residue.secondary_structure = motif
746
747 748 -class ProfileHMMSegment(ProfileHMM):
749 """ 750 Represents a segment (fragment) of a ProfileHMM. 751 752 @param hmm: source HMM 753 @type hmm: ProfileHMM 754 @param start: start layer of the segment (rank) 755 @type start: int 756 @param end: end layer of the segment (rank) 757 @type end: int 758 759 @raise ValueError: when start or end positions are out of range 760 """ 761
762 - def __init__(self, hmm, start, end):
763 764 if start < hmm.layers.start_index or start > hmm.layers.last_index: 765 raise IndexError('Start position {0} is out of range'.format(start)) 766 if end < hmm.layers.start_index or end > hmm.layers.last_index: 767 raise IndexError('End position {0} is out of range'.format(end)) 768 769 #hmm = csb.core.deepcopy(hmm) 770 771 super(ProfileHMMSegment, self).__init__(units=hmm.score_units, 772 scale=hmm.scale, logbase=hmm.logbase) 773 self.id = hmm.id 774 self.family = hmm.family 775 self.name = hmm.name 776 self.pseudocounts = hmm.pseudocounts 777 self.evd = hmm.evd 778 self.version = hmm.version 779 self.source = hmm.id 780 self._source_start = start 781 self._source_end = end 782 783 if hmm.alignment: 784 self.alignment = hmm.alignment.hmm_subregion(start, end) 785 self.consensus = hmm.consensus.subregion(start, end) 786 787 layers = csb.core.deepcopy(hmm.layers[start : end + 1]) 788 max_score = 1.0 789 if hmm.score_units != ScoreUnits.Probability: 790 max_score = hmm._convert(hmm.score_units, 791 max_score, hmm.scale, hmm.logbase) 792 self._build_graph(layers, max_score) 793 794 if hmm.dssp: 795 self.dssp = hmm.dssp.subregion(start, end) 796 self._assign_secstructure() 797 if hmm.psipred: 798 self.psipred = hmm.psipred.subregion(start, end) 799 800 self.length.layers = self.layers.length 801 self.length.matches = self.layers.length 802 self.effective_matches = sum([(l.effective_matches or 0.0) for l in self.layers]) / self.layers.length
803 804 @property
805 - def source_start(self):
806 """ 807 Start position of this segment in its source HMM 808 @rtype: int 809 """ 810 return self._source_start
811 812 @property
813 - def source_end(self):
814 """ 815 End position of this segment in its source HMM 816 @rtype: int 817 """ 818 return self._source_end
819
820 - def _build_graph(self, source_layers, max_score):
821 822 for rank, layer in enumerate(source_layers, start=1): 823 824 for atom_kind in layer.residue.atoms: 825 layer.residue.atoms[atom_kind].rank = rank 826 layer.residue._rank = rank 827 layer.rank = rank 828 829 self.layers.append(layer) 830 831 if rank == 1: 832 for state_kind in layer: 833 if state_kind in(States.Match, States.Deletion): 834 start_tran = Transition(self.start, layer[state_kind], max_score) 835 self.start.transitions.append(start_tran) 836 elif rank == len(source_layers): 837 for state_kind in layer: 838 state = layer[state_kind] 839 if not (States.End in state.transitions or States.Match in state.transitions): 840 state.transitions.set({}) 841 else: 842 end_tran = Transition(state, self.end, max_score) 843 state.transitions.set({States.End: end_tran}) # TODO: I->I ?
844
845 846 -class EmissionProfileSegment(ProfileHMMSegment):
847 """ 848 Represents a segment of the Match state emission probabilities of a L{ProfileHMM}. 849 Contains only Match states, connected with equal transition probabilities of 100%. 850 """ 851
852 - def _build_graph(self, source_layers):
853 854 factory = StateFactory() 855 856 for rank, source_layer in enumerate(source_layers, start=1): 857 858 emission = source_layer[States.Match].emission 859 background = source_layer[States.Match].background 860 861 match = factory.create_match(emission, background) 862 match.rank = rank 863 864 layer = HMMLayer(rank, source_layer.residue) 865 layer.append(match) 866 self.layers.append(layer) 867 868 if rank == 1: 869 self.start.transitions.append(Transition(self.start, match, 1.0)) 870 elif rank < len(source_layers): 871 prev_match = self.layers[rank - 1][States.Match] 872 prev_match.transitions.append(Transition(prev_match, match, 1.0)) 873 elif rank == len(source_layers): 874 match.transitions.append(Transition(match, self.end, 1.0)) 875 else: 876 assert False
877
878 879 -class ProfileHMMRegion(ProfileHMM):
880 """ 881 A shallow proxy referring to a sub-region of a given Profile HMM. 882 883 @param hmm: source HMM 884 @type hmm: L{ProfileHMM} 885 @param start: start layer of the segment (rank) 886 @type start: int 887 @param end: end layer of the segment (rank) 888 @type end: int 889 890 @raise ValueError: when start or end positions are out of range 891 """ 892
893 - def __init__(self, hmm, start, end):
894 895 if start < hmm.layers.start_index or start > hmm.layers.last_index: 896 raise IndexError('Start position {0} is out of range'.format(start)) 897 if end < hmm.layers.start_index or end > hmm.layers.last_index: 898 raise IndexError('End position {0} is out of range'.format(end)) 899 if hmm.score_units != ScoreUnits.Probability: 900 raise ValueError('Scores must be converted to probabilities first.') 901 902 self._layers = HMMLayersCollection(hmm.layers[start : end + 1]) 903 self._score_units = hmm.score_units 904 self.id = hmm.id 905 self.name = hmm.name 906 self.family = hmm.family 907 self._source_start = start 908 self._source_end = end
909 910 @property
911 - def source_start(self):
912 """ 913 Start position of this segment in its source HMM 914 @rtype: int 915 """ 916 return self._source_start
917 918 @property
919 - def source_end(self):
920 """ 921 End position of this segment in its source HMM 922 @rtype: int 923 """ 924 return self._source_end
925
926 927 -class ProfileLength(object):
928
929 - def __init__(self, matches, layers):
930 self.matches = matches 931 self.layers = layers
932
933 934 -class EVDParameters(object):
935
936 - def __init__(self, lamda, mu):
937 self.lamda = lamda 938 self.mu = mu
939
940 - def __nonzero__(self):
941 return self.__bool__()
942
943 - def __bool__(self):
944 return (self.lamda is not None or self.mu is not None)
945
946 947 -class EmissionTable(csb.core.DictionaryContainer):
948 """ 949 Represents a lookup table of emission probabilities. Provides dictionary-like 950 access: 951 952 >>> state.emission[ProteinAlphabet.ALA] 953 emission probability for ALA 954 955 @param emission: an initialization dictionary of emission probabilities 956 @type emission: dict 957 @param restrict: a list of residue types allowed for this emission table. 958 Defaults to the members of L{csb.bio.sequence.ProteinAlphabet} 959 @type restrict: list 960 """ 961
962 - def __init__(self, emission=None, restrict=Enum.members(sequence.ProteinAlphabet)):
963 super(EmissionTable, self).__init__(emission, restrict)
964
965 - def append(self, residue, probability):
966 """ 967 Append a new emission probability to the table. 968 969 @param residue: residue name (type) - a member of 970 L{csb.bio.sequence.ProteinAlphabet} 971 @type residue: L{csb.core.EnumItem} 972 @param probability: emission score 973 @type probability: float 974 975 @raise EmissionExistsError: if residue is already defined 976 """ 977 if residue in self: 978 raise EmissionExistsError('Residue {0} is already defined.'.format(residue)) 979 980 super(EmissionTable, self).append(residue, probability)
981
982 - def set(self, table):
983 """ 984 Set the emission table using the dictionary provided in the argument. 985 986 @param table: the new emission table 987 @type table: dict 988 """ 989 super(EmissionTable, self)._set(table)
990
991 - def update(self, residue, probability):
992 """ 993 Update the emission C{probability} of a given emission C{residue}. 994 995 @param residue: name (type) of the residue to be updated 996 @type residue: L{csb.core.EnumItem} 997 @param probability: new emission score 998 @type probability: float 999 """ 1000 super(EmissionTable, self)._update({residue: probability})
1001
1002 1003 -class TransitionTable(csb.core.DictionaryContainer):
1004 """ 1005 Represents a lookup table of transitions that are possible from within a given state. 1006 1007 Provides dictionary-like access, where dictionary keys are target states. 1008 These are members of the L{States} enumeration, e.g.: 1009 1010 >>> state.transitions[States.Match] 1011 transition info regarding transition from the current state to a Match state 1012 >>> state.transitions[States.Match].predecessor 1013 state 1014 >>> state.transitions[States.Match].successor 1015 the next match state 1016 1017 @param transitions: an initialization dictionary of target L{State}:L{Transition} pairs 1018 @type transitions: dict 1019 @param restrict: a list of target states allowed for this transition table. 1020 Defaults to the L{States} enum members 1021 @type restrict: list 1022 """ 1023
1024 - def __init__(self, transitions=None, restrict=Enum.members(States)):
1025 super(TransitionTable, self).__init__(transitions, restrict) 1026 1027 @property
1028 - def _exception(self):
1029 return TransitionNotFoundError
1030
1031 - def append(self, transition):
1032 """ 1033 Append a new C{transition} to the table. 1034 1035 @param transition: transition info 1036 @type transition: L{Transition} 1037 1038 @raise TransitionExistsError: when a transition to the same target state 1039 already exists for the current state 1040 """ 1041 1042 if transition.successor.type in self: 1043 msg = 'Transition to a {0} state is already defined.' 1044 raise TransitionExistsError(msg.format(transition.successor.type)) 1045 1046 super(TransitionTable, self).append(transition.successor.type, transition)
1047
1048 - def set(self, table):
1049 """ 1050 Set the transition table using the dictionary provided in the argument. 1051 1052 @param table: the new transition table 1053 @type table: dict 1054 """ 1055 super(TransitionTable, self)._set(table)
1056
1057 - def update(self, target_statekind, transition):
1058 """ 1059 Update the information of a transition, which points to a target 1060 state of the specified L{States} kind. 1061 1062 @param target_statekind: the key of the transition to be updated 1063 @type target_statekind: L{csb.core.EnumItem} 1064 @param transition: new transition info object 1065 @type transition: L{Transition} 1066 1067 @raise ValueError: if I{transition.successor.type} differs from 1068 C{target_statekind} 1069 """ 1070 if transition.successor.type != target_statekind: 1071 raise ValueError("Successor's type differs from the specified target state.") 1072 1073 super(TransitionTable, self)._update({target_statekind: transition})
1074
1075 1076 -class HMMLayersCollection(csb.core.CollectionContainer):
1077 """ 1078 Provides consecutive, 1-based access to all of the layers in the profile. 1079 Each profile layer contains a catalog of available states at that index, e.g.: 1080 1081 >>> profile.layers[i] 1082 the catalog at profile layer i 1083 >>> profile.layers[i][States.Deletion] 1084 the deletion state at index i 1085 1086 @param layers: initialization list of L{HMMLayer}s 1087 @type layers: list 1088 """
1089 - def __init__(self, layers=None):
1091 1092 @property
1093 - def _exception(self):
1094 return LayerIndexError
1095
1096 -class HMMLayer(csb.core.DictionaryContainer):
1097 """ 1098 Provides a dictionary-like catalog of the available states at this layer. 1099 Lookup keys are members of the L{States} enumeration, e.g.: 1100 1101 >>> profile.layers[i][States.Deletion] 1102 the deletion state at layer number i 1103 1104 @param rank: layer's number 1105 @type rank: int 1106 @param residue: a representative L{ProteinResidue} that is associated with 1107 this layer 1108 @type residue: L{ProteinResidue} 1109 @param states: initialization dictionary of L{States}.Item:L{State} pairs 1110 @type states: dict 1111 """
1112 - def __init__(self, rank, residue, states=None):
1113 1114 super(HMMLayer, self).__init__(states, restrict=Enum.members(States)) 1115 1116 self._rank = int(rank) 1117 self._residue = None 1118 self._effective_matches = None 1119 self._effective_insertions = None 1120 self._effective_deletions = None 1121 1122 self.residue = residue
1123 1124 @property
1125 - def _exception(self):
1126 return StateNotFoundError
1127 1128 @property
1129 - def rank(self):
1130 """ 1131 Layer's position 1132 @rtype: int 1133 """ 1134 return self._rank
1135 @rank.setter
1136 - def rank(self, value):
1137 self._rank = int(value)
1138 1139 @property
1140 - def residue(self):
1141 """ 1142 Representative residue 1143 @rtype: L{Residue} 1144 """ 1145 return self._residue
1146 @residue.setter
1147 - def residue(self, residue):
1148 if residue and residue.type == sequence.SequenceAlphabets.Protein.GAP: 1149 raise HMMArgumentError('HMM match states cannot be gaps') 1150 self._residue = residue
1151 1152 @property
1153 - def effective_matches(self):
1154 """ 1155 Number of effective matches at this layer 1156 @rtype: int 1157 """ 1158 return self._effective_matches
1159 @effective_matches.setter
1160 - def effective_matches(self, value):
1161 self._effective_matches = value
1162 1163 @property
1164 - def effective_insertions(self):
1165 """ 1166 Number of effective insertions at this layer 1167 @rtype: int 1168 """ 1169 return self._effective_insertions
1170 @effective_insertions.setter
1171 - def effective_insertions(self, value):
1172 self._effective_insertions = value
1173 1174 @property
1175 - def effective_deletions(self):
1176 """ 1177 Number of effective deletions at this layer 1178 @rtype: int 1179 """ 1180 return self._effective_deletions
1181 @effective_deletions.setter
1182 - def effective_deletions(self, value):
1183 self._effective_deletions = value
1184
1185 - def append(self, state):
1186 """ 1187 Append a new C{state} to the catalog. 1188 1189 @param state: the new state 1190 @type state: L{State} 1191 1192 @raise StateExistsError: when a state of the same type is already defined 1193 """ 1194 if state.type in self: 1195 raise StateExistsError( 1196 'State {0} is already defined at this position.'.format(state.type)) 1197 1198 super(HMMLayer, self).append(state.type, state)
1199
1200 - def update(self, state_kind, state):
1201 """ 1202 Update the sate of the specified kind under the current layer. 1203 1204 @param state_kind: state type (key) - a member of L{States} 1205 @type state_kind: L{csb.core.EnumItem} 1206 @param state: the new state info 1207 @type state: L{State} 1208 1209 @raise ValueError: if state.type differs from state_kind 1210 """ 1211 if state.type != state_kind: 1212 raise ValueError("State's type differs from the specified state_kind") 1213 1214 super(HMMLayer, self)._update({state_kind: state})
1215
1216 1217 -class State(object):
1218 """ 1219 Describes a Hidden Markov Model state. 1220 1221 @param type: one of the L{States} enumeration values, e.g. States.Match 1222 @type type: L{csb.core.EnumItem} 1223 @param emit: a collection of emittable state names allowed for the state, 1224 e.g. the members of I{SequenceAlphabets.Protein}. If not defined, 1225 the state will be created as a silent (unobservable). 1226 @type emit: list 1227 1228 @raise ValueError: if type is not a member of the States enum 1229 """ 1230
1231 - def __init__(self, type, emit=None):
1232 1233 self._type = None 1234 self._rank = None 1235 self._transitions = TransitionTable() 1236 self._emission = None 1237 self._background = None 1238 1239 self.type = type 1240 1241 if emit is not None: 1242 self._emission = EmissionTable(restrict=emit) 1243 self._background = EmissionTable(restrict=emit)
1244
1245 - def __repr__(self):
1246 return "<HMM {0.type!r} State>".format(self)
1247 1248 @property
1249 - def type(self):
1250 """ 1251 State type: one of the L{States} 1252 @rtype: enum item 1253 """ 1254 return self._type
1255 @type.setter
1256 - def type(self, value):
1257 if value.enum is not States: 1258 raise TypeError(value) 1259 self._type = value
1260 1261 @property
1262 - def rank(self):
1263 return self._rank
1264 @rank.setter
1265 - def rank(self, value):
1266 self._rank = int(value)
1267 1268 @property
1269 - def transitions(self):
1270 """ 1271 Lookup table with available transitions to other states 1272 @rtype: L{TransitionTable} 1273 """ 1274 return self._transitions
1275 1276 @property
1277 - def emission(self):
1278 """ 1279 Lookup table with available emission probabilities 1280 @rtype: L{EmissionTable} 1281 """ 1282 if self._emission is None: 1283 raise UnobservableStateError('Silent {0!r} state'.format(self.type)) 1284 return self._emission
1285 1286 @property
1287 - def background(self):
1288 """ 1289 Lookup table with background probabilities 1290 @rtype: L{EmissionTable} 1291 """ 1292 return self._background
1293 1294 @property
1295 - def silent(self):
1296 """ 1297 Whether this state can emit something 1298 @rtype: bool 1299 """ 1300 try: 1301 return self.emission is None 1302 except UnobservableStateError: 1303 return True
1304
1305 -class StateFactory(object):
1306 """ 1307 Simplifies the construction of protein profile HMM states. 1308 """ 1309
1310 - def __init__(self):
1312
1313 - def create_match(self, emission, background):
1314 1315 state = State(States.Match, emit=self._aa) 1316 state.emission.set(emission) 1317 state.background.set(background) 1318 return state
1319
1320 - def create_insertion(self, background):
1321 1322 state = State(States.Insertion, emit=self._aa) 1323 state.emission.set(background) 1324 state.background.set(background) 1325 return state
1326
1327 - def create_deletion(self):
1328 return State(States.Deletion)
1329
1330 1331 -class TransitionType(object):
1332
1333 - def __init__(self, source, target):
1334 self.source_state = source.type 1335 self.target_state = target.type
1336
1337 - def __repr__(self):
1338 return '{0}->{1}'.format(self.source_state, self.target_state)
1339
1340 -class Transition(object):
1341 """ 1342 Describes a Hidden Markov Model transition between two states. 1343 1344 @param predecessor: source state 1345 @type predecessor: L{State} 1346 @param successor: target state 1347 @type successor: L{State} 1348 @param probability: transition score 1349 @type probability: float 1350 """ 1351
1352 - def __init__(self, predecessor, successor, probability):
1353 1354 if not (isinstance(predecessor, State) or isinstance(successor, State)): 1355 raise TypeError('Predecessor and successor must be State instances.') 1356 1357 self._predecessor = predecessor 1358 self._successor = successor 1359 self._probability = None 1360 self._type = TransitionType(predecessor, successor) 1361 1362 self.probability = probability
1363
1364 - def __str__(self):
1365 return '<HMM Transition: {0.type} {0.probability}>'.format(self)
1366 1367 @property
1368 - def predecessor(self):
1369 """ 1370 Transition source state 1371 @rtype: L{State} 1372 """ 1373 return self._predecessor
1374 1375 @property
1376 - def successor(self):
1377 """ 1378 Transition target state 1379 @rtype: L{State} 1380 """ 1381 return self._successor
1382 1383 @property
1384 - def probability(self):
1385 """ 1386 Transition score 1387 @rtype: float 1388 """ 1389 return self._probability
1390 @probability.setter
1391 - def probability(self, value):
1392 if not (value >=0): 1393 raise ValueError('Transition probability must be a positive number.') 1394 self._probability = float(value)
1395 1396 @property
1397 - def type(self):
1398 """ 1399 Struct, containing information about the source and target state types 1400 @rtype: L{TransitionType} 1401 """ 1402 return self._type
1403
1404 1405 -class HHpredHitAlignment(sequence.SequenceAlignment):
1406 """ 1407 Represents a query-template alignment in an HHpred result. 1408 1409 @param hit: relevant hit object 1410 @type param: L{HHpredHit} 1411 @param query: the query sequence in the alignment region, with gaps 1412 @type query: str 1413 @param subject: the subject sequence in the alignment region, with gaps 1414 @type subject: str 1415 """ 1416 1417 GAP = sequence.ProteinAlphabet.GAP 1418
1419 - def __init__(self, hit, query, subject):
1420 1421 if not isinstance(hit, HHpredHit): 1422 raise TypeError(hit) 1423 1424 self._hit = hit 1425 1426 q = sequence.Sequence('query', '', ''.join(query), type=sequence.SequenceTypes.Protein) 1427 s = sequence.Sequence(hit.id, '', ''.join(subject), type=sequence.SequenceTypes.Protein) 1428 1429 super(HHpredHitAlignment, self).__init__((q, s))
1430 1431 @property
1432 - def query(self):
1433 """ 1434 Query sequence (with gaps) 1435 @rtype: str 1436 """ 1437 return self.rows[1].sequence
1438 1439 @property
1440 - def subject(self):
1441 """ 1442 Subject sequence (with gaps) 1443 @rtype: str 1444 """ 1445 return self.rows[2].sequence
1446 1447 @property
1448 - def segments(self):
1449 """ 1450 Find all ungapped query-subject segments in the alignment. 1451 Return a generator over all ungapped alignment segments, represented 1452 by L{HHpredHit} objects 1453 1454 @rtype: generator 1455 """ 1456 1457 def make_segment(sstart, send, qstart, qend): 1458 1459 seg = HHpredHit(self._hit.rank, self._hit.id, sstart, send, 1460 qstart, qend, self._hit.probability, self._hit.qlength) 1461 1462 seg.slength = self._hit.slength 1463 seg.evalue = self._hit.evalue 1464 seg.pvalue = self._hit.pvalue 1465 seg.score = self._hit.score 1466 seg.ss_score = self._hit.ss_score 1467 seg.identity = self._hit.identity 1468 seg.similarity = self._hit.similarity 1469 seg.prob_sum = self._hit.prob_sum 1470 1471 return seg
1472 1473 in_segment = False 1474 qs = self._hit.qstart - 1 1475 ss = self._hit.start - 1 1476 qi, si = qs, ss 1477 qe, se = qs, ss 1478 1479 for q, s in zip(self.query, self.subject): 1480 1481 if q != HHpredHitAlignment.GAP: 1482 qi += 1 1483 if s != HHpredHitAlignment.GAP: 1484 si += 1 1485 1486 if HHpredHitAlignment.GAP in (q, s): 1487 if in_segment: 1488 yield make_segment(ss, se, qs, qe) 1489 in_segment = False 1490 qs, ss = 0, 0 1491 qe, se = 0, 0 1492 else: 1493 if not in_segment: 1494 in_segment = True 1495 qs, ss = qi, si 1496 1497 qe, se = qi, si 1498 1499 if in_segment: 1500 yield make_segment(ss, se, qs, qe)
1501
1502 - def to_a3m(self):
1503 """ 1504 @return: a query-centric A3M alignment. 1505 @rtype: L{csb.bio.sequence.A3MAlignment} 1506 """ 1507 a3m = self.format(sequence.AlignmentFormats.A3M) 1508 return sequence.A3MAlignment.parse(a3m, strict=False)
1509
1510 -class HHpredHit(object):
1511 """ 1512 Represents a single HHsearch hit. 1513 1514 @param rank: rank of the hit 1515 @type rank: int 1516 @param id: id of the hit 1517 @type id: str 1518 @param start: subject start 1519 @type start: int 1520 @param end: subject end 1521 @type end: int 1522 @param qstart: query start 1523 @type qstart: int 1524 @param qend: query end 1525 @type qend: int 1526 @param probability: probability of the hit 1527 @type probability: float 1528 @param qlength: length of the query 1529 @type qlength: int 1530 """ 1531
1532 - def __init__(self, rank, id, start, end, qstart, qend, probability, 1533 qlength):
1534 1535 self._rank = None 1536 self._id = None 1537 self._start = None 1538 self._end = None 1539 self._qstart = None 1540 self._qend = None 1541 self._probability = None 1542 self._qlength = None 1543 self._alignment = None 1544 1545 self._slength = None 1546 self._evalue = None 1547 self._pvalue = None 1548 self._score = None 1549 self._ss_score = None 1550 self._identity = None 1551 self._similarity = None 1552 self._prob_sum = None 1553 1554 # managed properties 1555 self.rank = rank 1556 self.id = id 1557 self.start = start 1558 self.end = end 1559 self.qstart = qstart 1560 self.qend = qend 1561 self.probability = probability 1562 self.qlength = qlength
1563
1564 - def __str__(self):
1565 return "{0.id} {0.probability} {0.start}-{0.end}".format(self)
1566
1567 - def __repr__(self):
1568 return "<HHpredHit: {0!s}>".format(self)
1569
1570 - def __lt__(self, other):
1571 return self.rank < other.rank
1572
1573 - def equals(self, other):
1574 """ 1575 Return True if C{self} is completely identical to C{other} (same id, same start 1576 and end positions). 1577 1578 @param other: right-hand-term 1579 @type other: HHpredHit 1580 1581 @rtype: bool 1582 """ 1583 return (self.id == other.id and self.start == other.start and self.end == other.end)
1584
1585 - def surpasses(self, other):
1586 """ 1587 Return True if C{self} is a superior to C{other} in terms of length 1588 and probability. These criteria are applied in the following order: 1589 1590 1. Length (the longer hit is better) 1591 2. Probability (if they have the same length, the one with the higher 1592 probability is better) 1593 3. Address (if they have the same length and probability, the one with 1594 higher memory ID wins; for purely practical reasons) 1595 1596 @param other: right-hand-term 1597 @type other: HHpredHit 1598 1599 @rtype: bool 1600 """ 1601 if self.length > other.length: 1602 return True 1603 elif self.length == other.length: 1604 if self.probability > other.probability: 1605 return True 1606 elif self.probability == other.probability: 1607 if id(self) > id(other): 1608 return True 1609 return False
1610
1611 - def includes(self, other, tolerance=1):
1612 """ 1613 Return True if C{other} overlaps with C{self}, that means C{other} 1614 is fully or partially included in C{self} when aligned over the query. 1615 1616 @param other: right-hand-term 1617 @type other: HHpredHit 1618 @param tolerance: allow partial overlaps for that number of residues at 1619 either end 1620 @type tolerance: int 1621 1622 @rtype: bool 1623 """ 1624 if self.id == other.id: 1625 if other.start >= self.start: 1626 if (other.end - self.end) <= tolerance: 1627 return True 1628 elif other.end <= self.end: 1629 if (self.start - other.start) <= tolerance: 1630 return True 1631 1632 return False
1633
1634 - def add_alignment(self, query, subject):
1635 """ 1636 Add query/subject alignment to the hit. 1637 1638 @param query: the query sequence within the alignment region, with gaps 1639 @type query: str 1640 @param subject: the subject sequence within the alignment region, with 1641 gaps 1642 @type subject: str 1643 """ 1644 self._alignment = HHpredHitAlignment(self, query, subject)
1645 1646 @property
1647 - def rank(self):
1648 return self._rank
1649 @rank.setter
1650 - def rank(self, value):
1651 try: 1652 value = int(value) 1653 except: 1654 raise TypeError('rank must be int, not {1}'.format(type(value))) 1655 self._rank = value
1656 1657 @property
1658 - def id(self):
1659 return self._id
1660 @id.setter
1661 - def id(self, value):
1662 try: 1663 value = str(value) 1664 except: 1665 raise TypeError('id must be string, not {0}'.format(type(value))) 1666 self._id = value
1667 1668 @property
1669 - def start(self):
1670 return self._start
1671 @start.setter
1672 - def start(self, value):
1673 try: 1674 value = int(value) 1675 except: 1676 raise TypeError('start must be int, not {0}'.format(type(value))) 1677 self._start = value
1678 1679 @property
1680 - def end(self):
1681 return self._end
1682 @end.setter
1683 - def end(self, value):
1684 try: 1685 value = int(value) 1686 except: 1687 raise TypeError('end must be int, not {0}'.format(type(value))) 1688 self._end = value
1689 1690 @property
1691 - def qstart(self):
1692 return self._qstart
1693 @qstart.setter
1694 - def qstart(self, value):
1695 try: 1696 value = int(value) 1697 except: 1698 raise TypeError('qstart must be int, not {0}'.format(type(value))) 1699 self._qstart = value
1700 1701 @property
1702 - def qend(self):
1703 return self._qend
1704 @qend.setter
1705 - def qend(self, value):
1706 try: 1707 value = int(value) 1708 except: 1709 raise TypeError('qend must be int, not {0}'.format(type(value))) 1710 self._qend = value
1711 1712 @property
1713 - def qlength(self):
1714 return self._qlength
1715 @qlength.setter
1716 - def qlength(self, value):
1717 try: 1718 value = int(value) 1719 except: 1720 raise TypeError('qlength must be int, not {0}'.format(type(value))) 1721 self._qlength = value
1722 1723 @property
1724 - def probability(self):
1725 return self._probability
1726 @probability.setter
1727 - def probability(self, value):
1728 try: 1729 value = float(value) 1730 except: 1731 raise TypeError('probability must be float, not {0}'.format(type(value))) 1732 self._probability = value
1733 1734 @property
1735 - def alignment(self):
1736 return self._alignment
1737 1738 @property
1739 - def length(self):
1740 try: 1741 return self.end - self.start + 1 1742 except: 1743 return 0
1744 1745 @property
1746 - def slength(self):
1747 return self._slength
1748 @slength.setter
1749 - def slength(self, value):
1750 self._slength = value
1751 1752 @property
1753 - def evalue(self):
1754 return self._evalue
1755 @evalue.setter
1756 - def evalue(self, value):
1757 self._evalue = value
1758 1759 @property
1760 - def pvalue(self):
1761 return self._pvalue
1762 @pvalue.setter
1763 - def pvalue(self, value):
1764 self._pvalue = value
1765 1766 @property
1767 - def score(self):
1768 return self._score
1769 @score.setter
1770 - def score(self, value):
1771 self._score = value
1772 1773 @property
1774 - def ss_score(self):
1775 return self._ss_score
1776 @ss_score.setter
1777 - def ss_score(self, value):
1778 self._ss_score = value
1779 1780 @property
1781 - def identity(self):
1782 return self._identity
1783 @identity.setter
1784 - def identity(self, value):
1785 self._identity = value
1786 1787 @property
1788 - def similarity(self):
1789 return self._similarity
1790 @similarity.setter
1791 - def similarity(self, value):
1792 self._similarity = value
1793 1794 @property
1795 - def prob_sum(self):
1796 return self._prob_sum
1797 @prob_sum.setter
1798 - def prob_sum(self, value):
1799 self._prob_sum = value
1800
1801 1802 -class HHpredHitList(object):
1803 """ 1804 Represents a collection of L{HHpredHit}s. 1805 """ 1806
1807 - def __init__(self, hits, query_name='', match_columns=-1, no_of_seqs='', 1808 neff=-1., searched_hmms=-1, date='', command=''):
1809 self._hits = list(hits) 1810 1811 self._query_name = None 1812 self._match_columns = None 1813 self._no_of_seqs = None 1814 self._neff = None 1815 self._searched_hmms = None 1816 self._date = None 1817 self._command = None 1818 1819 self.query_name = query_name 1820 self.match_columns = match_columns 1821 self.no_of_seqs = no_of_seqs 1822 self.neff = neff 1823 self.searched_hmms = searched_hmms 1824 self.date = date 1825 self.command = command
1826 1827 @property
1828 - def query_name(self):
1829 return self._query_name
1830 @query_name.setter
1831 - def query_name(self, value):
1832 self._query_name = value
1833 1834 @property
1835 - def match_columns(self):
1836 return self._match_columns
1837 @match_columns.setter
1838 - def match_columns(self, value):
1839 self._match_columns = value
1840 1841 @property
1842 - def no_of_seqs(self):
1843 return self._no_of_seqs
1844 @no_of_seqs.setter
1845 - def no_of_seqs(self, value):
1846 self._no_of_seqs = value
1847 1848 @property
1849 - def neff(self):
1850 return self._neff
1851 @neff.setter
1852 - def neff(self, value):
1853 self._neff = value
1854 1855 @property
1856 - def searched_hmms(self):
1857 return self._searched_hmms
1858 @searched_hmms.setter
1859 - def searched_hmms(self, value):
1860 self._searched_hmms = value
1861 1862 @property
1863 - def date(self):
1864 return self._date
1865 @date.setter
1866 - def date(self, value):
1867 self._date = value
1868 1869 @property
1870 - def command(self):
1871 return self._command
1872 @command.setter
1873 - def command(self, value):
1874 self._command = value
1875
1876 - def __str__(self):
1877 return "HHpredHitList\n\tquery={0.query_name}\n\tmatch_columns={0.match_columns}\n\tno_of_seqs={0.no_of_seqs}\n\tneff={0.neff}\n\tsearched_hmms={0.searched_hmms}\n\tdate={0.date}\n\tcommand={0.command}".format(self)
1878
1879 - def __repr__(self):
1880 return "<HHpredHitList: {0} hits>".format(len(self))
1881
1882 - def __getitem__(self, index):
1883 return self._hits[index]
1884
1885 - def __iter__(self):
1886 return iter(self._hits)
1887
1888 - def __len__(self):
1889 return len(self._hits)
1890
1891 - def sort(self):
1892 self._hits.sort(key=lambda i: i.rank)
1893