Package csb :: Package bio :: Package structure
[frames] | no frames]

Source Code for Package csb.bio.structure

   1  """ 
   2  3D and secondary structure APIs. 
   3   
   4  This module defines some of the most fundamental abstractions in the library: 
   5  L{Structure}, L{Chain}, L{Residue} and L{Atom}. Instances of these objects may 
   6  exist independently and that is perfectly fine, but usually they are part of a 
   7  Composite aggregation. The root node in this Composite is a L{Structure} (or 
   8  L{Ensemble}). L{Structure}s are composed of L{Chain}s, and each L{Chain} is a 
   9  collection of L{Residue}s. The leaf node is L{Atom}.  
  10   
  11  All of these objects implement the base L{AbstractEntity} interface. Therefore, 
  12  every node in the Composite can be transformed: 
  13       
  14      >>> r, t = [rotation matrix], [translation vector] 
  15      >>> entity.transform(r, t) 
  16       
  17  and it knows its immediate children: 
  18   
  19      >>> entity.items 
  20      <iterator>    # over all immediate child entities 
  21       
  22  If you want to traverse the complete Composite tree, starting at arbitrary level, 
  23  and down to the lowest level, use one of the L{CompositeEntityIterator}s. Or just 
  24  call L{AbstractEntity.components}: 
  25   
  26      >>> entity.components() 
  27      <iterator>   # over all descendants, of any type, at any level 
  28      >>> entity.components(klass=Residue) 
  29      <iterator>   # over all Residue descendants 
  30       
  31  Some of the inner objects in this hierarchy behave just like dictionaries 
  32  (but are not): 
  33   
  34      >>> structure.chains['A']       # access chain A by ID 
  35      <Chain A: Protein> 
  36      >>> structure['A']              # the same 
  37      <Chain A: Protein> 
  38      >>> residue.atoms['CS']           
  39      <Atom: CA>                      # access an atom by its name 
  40      >>> residue.atoms['CS']           
  41      <Atom: CA>                      # the same 
  42           
  43  Others behave like list collections: 
  44   
  45      >>> chain.residues[10]               # 1-based access to the residues in the chain 
  46      <ProteinResidue [10]: PRO 10> 
  47      >>> chain[10]                        # 0-based, list-like access 
  48      <ProteinResidue [11]: GLY 11> 
  49       
  50  Step-wise building of L{Ensemble}s, L{Chain}s and L{Residue}s is supported through 
  51  a number of C{append} methods, for example: 
  52   
  53      >>> residue = ProteinResidue(401, ProteinAlphabet.ALA) 
  54      >>> s.chains['A'].residues.append(residue) 
  55       
  56  See L{EnsembleModelsCollection}, L{StructureChainsTable}, L{ChainResiduesCollection} 
  57  and L{ResidueAtomsTable} for more details. 
  58   
  59  Some other objects in this module of potential interest are the self-explanatory 
  60  L{SecondaryStructure} and L{TorsionAngles}.      
  61  """ 
  62   
  63  import os 
  64  import re 
  65  import copy 
  66  import math 
  67  import numpy 
  68   
  69  import csb.io 
  70  import csb.core 
  71  import csb.numeric 
  72  import csb.bio.utils 
  73   
  74  from abc import ABCMeta, abstractmethod, abstractproperty 
  75   
  76  from csb.bio.sequence import SequenceTypes, SequenceAlphabets, AlignmentTypes 
77 78 79 -class AngleUnits(csb.core.enum):
80 """ 81 Torsion angle unit types 82 """ 83 Degrees='deg'; Radians='rad'
84
85 -class SecStructures(csb.core.enum):
86 """ 87 Secondary structure types 88 """ 89 Helix='H'; Strand='E'; Coil='C'; Turn='T'; Bend='S'; 90 Helix3='G'; PiHelix='I'; BetaBridge='B'; Gap='-'
91
92 -class ChemElements(csb.core.enum):
93 """ 94 Periodic table elements 95 """ 96 H=1; He=2; Li=3; Be=4; B=5; C=6; N=7; O=8; F=9; Ne=10; Na=11; Mg=12; Al=13; Si=14; P=15; 97 S=16; Cl=17; Ar=18; K=19; Ca=20; Sc=21; Ti=22; V=23; Cr=24; Mn=25; Fe=26; Co=27; Ni=28; 98 Cu=29; Zn=30; Ga=31; Ge=32; As=33; Se=34; Br=35; Kr=36; Rb=37; Sr=38; Y=39; Zr=40; Nb=41; 99 Mo=42; Tc=43; Ru=44; Rh=45; Pd=46; Ag=47; Cd=48; In=49; Sn=50; Sb=51; Te=52; I=53; Xe=54; 100 Cs=55; Ba=56; Hf=72; Ta=73; W=74; Re=75; Os=76; Ir=77; Pt=78; Au=79; Hg=80; Tl=81; Pb=82; 101 Bi=83; Po=84; At=85; Rn=86; Fr=87; Ra=88; Rf=104; Db=105; Sg=106; Bh=107; Hs=108; Mt=109; 102 Ds=110; Rg=111; La=57; Ce=58; Pr=59; Nd=60; Pm=61; Sm=62; Eu=63; Gd=64; Tb=65; Dy=66; 103 Ho=67; Er=68; Tm=69; Yb=70; Lu=71; Ac=89; Th=90; Pa=91; U=92; Np=93; Pu=94; Am=95; Cm=96; 104 Bk=97; Cf=98; Es=99; Fm=100; Md=101; No=102; Lr=103; x=-1
105
106 107 -class Broken3DStructureError(ValueError):
108 pass
109
110 -class Missing3DStructureError(Broken3DStructureError):
111 pass
112
113 -class InvalidOperation(Exception):
114 pass
115
116 -class EntityNotFoundError(csb.core.ItemNotFoundError):
117 pass
118
119 -class ChainNotFoundError(EntityNotFoundError):
120 pass
121
122 -class AtomNotFoundError(EntityNotFoundError):
123 pass
124
125 -class EntityIndexError(csb.core.CollectionIndexError):
126 pass
127
128 -class DuplicateModelIDError(csb.core.DuplicateKeyError):
129 pass
130
131 -class DuplicateChainIDError(csb.core.DuplicateKeyError):
132 pass
133
134 -class DuplicateResidueIDError(csb.core.DuplicateKeyError):
135 pass
136
137 -class DuplicateAtomIDError(csb.core.DuplicateKeyError):
138 pass
139
140 -class AlignmentArgumentLengthError(ValueError):
141 pass
142
143 -class BrokenSecStructureError(ValueError):
144 pass
145
146 -class UnknownSecStructureError(BrokenSecStructureError):
147 pass
148
149 -class AbstractEntity(object):
150 """ 151 Base class for all protein structure entities. 152 153 This class defines uniform interface of all entities (e.g. L{Structure}, 154 L{Chain}, L{Residue}) according to the Composite pattern. 155 """ 156 157 __metaclass__ = ABCMeta 158 159 @abstractproperty
160 - def items(self):
161 """ 162 Iterator over all immediate children of the entity 163 @rtype: iterator of L{AbstractEntity} 164 """ 165 pass
166
167 - def components(self, klass=None):
168 """ 169 Return an iterator over all descendants of the entity. 170 171 @param klass: return entities of the specified L{AbstractEntity} subclass 172 only. If None, traverse the hierarchy down to the lowest level. 173 @param klass: class 174 """ 175 for entity in CompositeEntityIterator.create(self, klass): 176 if klass is None or isinstance(entity, klass): 177 yield entity
178
179 - def transform(self, rotation, translation):
180 """ 181 Apply in place RotationMatrix and translation Vector to all atoms. 182 183 @type rotation: numpy array 184 @type translation: numpy array 185 """ 186 for node in self.items: 187 node.transform(rotation, translation)
188
189 - def get_coordinates(self, what=None, skip=False):
190 """ 191 Extract the coordinates of the specified kind(s) of atoms and return 192 them as a list. 193 194 @param what: a list of atom kinds, e.g. ['N', 'CA', 'C'] 195 @type what: list or None 196 197 @return: a list of lists, each internal list corresponding to the coordinates 198 of a 3D vector 199 @rtype: list 200 201 @raise Broken3DStructureError: if a specific atom kind cannot be retrieved from a residue 202 """ 203 coords = [ ] 204 205 for residue in self.components(klass=Residue): 206 for atom_kind in (what or residue.atoms): 207 try: 208 coords.append(residue.atoms[atom_kind].vector) 209 except csb.core.ItemNotFoundError: 210 if skip: 211 continue 212 raise Broken3DStructureError('Could not retrieve {0} atom from the structure'.format(atom_kind)) 213 214 return numpy.array(coords)
215
216 -class CompositeEntityIterator(object):
217 """ 218 Iterates over composite L{AbstractEntity} hierarchies. 219 220 @param node: root entity to traverse 221 @type node: L{AbstractEntity} 222 """ 223
224 - def __init__(self, node):
225 226 if not isinstance(node, AbstractEntity): 227 raise TypeError(node) 228 229 self._node = node 230 self._stack = csb.core.Stack() 231 232 self._inspect(node)
233
234 - def __iter__(self):
235 return self
236
237 - def __next__(self):
238 return self.next()
239
240 - def next(self):
241 242 while True: 243 if self._stack.empty(): 244 raise StopIteration() 245 246 try: 247 current = self._stack.peek() 248 node = next(current) 249 self._inspect(node) 250 return node 251 252 except StopIteration: 253 self._stack.pop()
254
255 - def _inspect(self, node):
256 """ 257 Push C{node}'s children to the stack. 258 """ 259 self._stack.push(node.items)
260 261 @staticmethod
262 - def create(node, leaf=None):
263 """ 264 Create a new composite iterator. 265 266 @param leaf: if not None, return a L{ConfinedEntityIterator} 267 @type leaf: class 268 @rtype: L{CompositeEntityIterator} 269 """ 270 if leaf is None: 271 return CompositeEntityIterator(node) 272 else: 273 return ConfinedEntityIterator(node, leaf)
274
275 -class ConfinedEntityIterator(CompositeEntityIterator):
276 """ 277 Iterates over composite L{AbstractEntity} hierarchies, but terminates 278 the traversal of a branch once a specific node type is encountered. 279 280 @param node: root entity to traverse 281 @type node: L{AbstractEntity} 282 @param leaf: traverse the hierarchy down to the specified L{AbstractEntity} 283 @type leaf: class 284 """
285 - def __init__(self, node, leaf):
286 287 if not issubclass(leaf, AbstractEntity): 288 raise TypeError(leaf) 289 290 self._leaf = leaf 291 super(ConfinedEntityIterator, self).__init__(node)
292
293 - def _inspect(self, node):
294 295 if not isinstance(node, self._leaf): 296 self._stack.push(node.items)
297
298 -class Ensemble(csb.core.AbstractNIContainer, AbstractEntity):
299 """ 300 Represents an ensemble of multiple L{Structure} models. 301 Provides a list-like access to these models: 302 303 >>> ensemble[0] 304 <Structure Model 1: accn, x chains> 305 >>> ensemble.models[1] 306 <Structure Model 1: accn, x chains> 307 """ 308
309 - def __init__(self):
310 self._models = EnsembleModelsCollection()
311
312 - def __repr__(self):
313 return "<Ensemble: {0} models>".format(self.models.length)
314 315 @property
316 - def _children(self):
317 return self._models
318 319 @property
320 - def models(self):
321 """ 322 Access Ensembles's models by model ID 323 @rtype: L{EnsembleModelsCollection} 324 """ 325 return self._models
326 327 @property
328 - def items(self):
329 return iter(self._models)
330 331 @property
332 - def first_model(self):
333 """ 334 The first L{Structure} in the ensemble (if available) 335 @rtype: L{Structure} or None 336 """ 337 if len(self._models) > 0: 338 return self[0] 339 return None
340
341 - def to_pdb(self, output_file=None):
342 """ 343 Dump the ensemble in PDB format. 344 345 @param output_file: output file name or open stream 346 @type output_file: str or stream 347 """ 348 from csb.bio.io.wwpdb import PDBEnsembleFileBuilder 349 350 if self.models.length < 1: 351 raise InvalidOperation("Can't dump an empty ensemble") 352 353 temp = csb.io.MemoryStream() 354 355 builder = PDBEnsembleFileBuilder(temp) 356 builder.add_header(self.first_model) 357 358 for model in self.models: 359 builder.add_structure(model) 360 361 builder.finalize() 362 363 data = temp.getvalue() 364 temp.close() 365 366 if not output_file: 367 return data 368 else: 369 with csb.io.EntryWriter(output_file, close=False) as out: 370 out.write(data)
371
372 -class EnsembleModelsCollection(csb.core.CollectionContainer):
373
374 - def __init__(self):
375 376 super(EnsembleModelsCollection, self).__init__(type=Structure, start_index=1) 377 self._models = set()
378
379 - def append(self, structure):
380 """ 381 Add a new model 382 383 @param structure: model to append 384 @type structure: L{Structure} 385 """ 386 387 if not structure.model_id or not str(structure.model_id).strip(): 388 raise ValueError("Invalid model identifier: '{0.model_id}'".format(structure)) 389 if structure.model_id in self._models: 390 raise DuplicateModelIDError(structure.model_id) 391 else: 392 return super(EnsembleModelsCollection, self).append(structure)
393 394 @property
395 - def _exception(self):
396 return EntityIndexError
397
398 399 -class Structure(csb.core.AbstractNIContainer, AbstractEntity):
400 """ 401 Represents a single model of a PDB 3-Dimensional molecular structure. 402 Provides access to the L{Chain} objects, contained in the model: 403 404 >>> structure['A'] 405 <Chain A: Protein> 406 >>> structure.chains['A'] 407 <Chain A: Protein> 408 >>> structure.items 409 <iterator of Chain-s> 410 411 @param accession: accession number of the structure 412 @type accession: str 413 """
414 - def __init__(self, accession):
415 416 self._accession = None 417 self._chains = StructureChainsTable(self) 418 self._model_id = None 419 420 self.accession = accession
421
422 - def __repr__(self):
423 return "<Structure Model {0.model_id}: {0.accession}, {1} chains>".format(self, self.chains.length)
424 425 @property
426 - def _children(self):
427 return self._chains
428 429 @property
430 - def chains(self):
431 """ 432 Access chains by their chain identifiers 433 @rtype: L{StructureChainsTable} 434 """ 435 return self._chains
436 437 @property
438 - def items(self):
439 for chain in self._chains: 440 yield self._chains[chain]
441 442 @property
443 - def first_chain(self):
444 """ 445 The first L{Chain} in the structure (if available) 446 @rtype: L{Chain} or None 447 """ 448 if len(self._chains) > 0: 449 return next(self.items) 450 return None
451 452 @property
453 - def accession(self):
454 """ 455 Accession number 456 @rtype: str 457 """ 458 return self._accession
459 @accession.setter
460 - def accession(self, accession):
461 if accession is None: 462 raise ValueError(accession) 463 self._accession = str(accession).strip().lower() 464 for c in self.chains: 465 self.chains[c]._accession = self._accession
466 467 @property
468 - def model_id(self):
469 """ 470 Model ID 471 @rtype: int 472 """ 473 return self._model_id
474 @model_id.setter
475 - def model_id(self, value):
476 self._model_id = value
477
478 - def to_fasta(self):
479 """ 480 Dump the structure in FASTA format. 481 482 @return: FASTA-formatted string with all chains in the structure 483 @rtype: str 484 485 @deprecated: this method will be removed soon. Use 486 L{csb.bio.sequence.ChainSequence.create} instead 487 """ 488 fasta = [] 489 490 for chain in self.items: 491 492 if chain.length > 0: 493 fasta.append('>{0}'.format(chain.header)) 494 fasta.append(chain.sequence) 495 496 return os.linesep.join(fasta)
497
498 - def to_pdb(self, output_file=None):
499 """ 500 Dump the whole structure in PDB format. 501 502 @param output_file: output file name or open stream 503 @type output_file: str or stream 504 """ 505 from csb.bio.io.wwpdb import PDBFileBuilder 506 507 temp = csb.io.MemoryStream() 508 builder = PDBFileBuilder(temp) 509 510 builder.add_header(self) 511 builder.add_structure(self) 512 builder.finalize() 513 514 data = temp.getvalue() 515 temp.close() 516 517 if not output_file: 518 return data 519 else: 520 with csb.io.EntryWriter(output_file, close=False) as out: 521 out.write(data)
522
523 -class StructureChainsTable(csb.core.DictionaryContainer):
524
525 - def __init__(self, structure=None, chains=None):
526 self.__container = structure 527 super(StructureChainsTable, self).__init__() 528 529 if chains is not None: 530 for chain in chains: 531 self.append(chain)
532
533 - def __repr__(self):
534 if len(self) > 0: 535 return "<StructureChains: {0}>".format(', '.join(self)) 536 else: 537 return "<StructureChains: empty>"
538 539 @property
540 - def _exception(self):
541 return ChainNotFoundError
542
543 - def append(self, chain):
544 """ 545 Add a new Chain to the structure. 546 547 @param chain: the new chain to be appended 548 @type chain: L{Chain} 549 550 @raise DuplicateChainIDError: if a chain with same ID is already defined 551 @raise InvalidOperation: if the chain is already associated with a structure 552 """ 553 554 if chain._structure and chain._structure is not self.__container: 555 raise InvalidOperation('This chain is already part of another structure') 556 if chain.id in self: 557 raise DuplicateChainIDError('A chain with ID {0} is already defined'.format(chain.id)) 558 559 super(StructureChainsTable, self).append(chain.id, chain) 560 561 if self.__container: 562 chain._accession = self.__container.accession 563 chain._structure = self.__container
564
565 - def remove(self, id):
566 """ 567 Remove a chain from the structure. 568 569 @param id: ID of the chain to be detached 570 @type id: str 571 """ 572 chain = self[id] 573 self._remove(id) 574 chain._structure = None
575
576 - def _update_chain_id(self, chain, new_id):
577 578 if chain.id not in self or self[chain.id] is not chain: 579 raise InvalidOperation(chain) 580 581 self._remove(chain.id) 582 583 if new_id in self: 584 raise DuplicateChainIDError('Chain ID {0} is already defined'.format(id)) 585 586 super(StructureChainsTable, self).append(new_id, chain)
587
588 -class Chain(csb.core.AbstractNIContainer, AbstractEntity):
589 """ 590 Represents a polymeric chain. Provides list-like and rank-based access to 591 the residues in the chain: 592 593 >>> chain[0] 594 <ProteinResidue [1]: SER None> 595 >>> chain.residues[1] 596 <ProteinResidue [1]: SER None> 597 598 You can also access residues by their PDB sequence number: 599 600 >>> chain.find(sequence_number=5, insertion_code='A') 601 <ProteinResidue [1]: SER 5A> 602 603 @param chain_id: ID of the new chain 604 @type chain_id: str 605 @param type: sequence type (a member of the L{SequenceTypes} enum) 606 @type type: L{csb.core.EnumItem} 607 @param name: name of the chain 608 @type name: str 609 @param residues: initialization list of L{Residue}-s 610 @type residues: list 611 @param accession: accession number of the chain 612 @type accession: str 613 @param molecule_id: MOL ID of the chain, if part of a polymer 614 615 """
616 - def __init__(self, chain_id, type=SequenceTypes.Protein, name='', 617 residues=None, accession=None, molecule_id=None):
618 619 self._id = str(chain_id).strip() 620 self._accession = None 621 self._type = None 622 self._residues = ChainResiduesCollection(self, residues) 623 self._secondary_structure = None 624 self._molecule_id = molecule_id 625 self._torsion_computed = False 626 self._name = str(name).strip() 627 628 self._structure = None 629 630 self.type = type 631 if accession is not None: 632 self.accession = accession
633 634 @staticmethod
635 - def from_sequence(sequence, id="_"):
636 """ 637 Create a new chain from an existing sequence. 638 639 @param sequence: source sequence 640 @type sequence: L{csb.bio.sequence.AbstractSequence} 641 642 @rtype: L{Chain} 643 """ 644 645 chain = Chain(id, type=sequence.type) 646 647 for ri in sequence.residues: 648 residue = Residue.create(sequence.type, ri.rank, ri.type, sequence_number=ri.rank) 649 chain.residues.append(residue) 650 651 return chain
652 653 @property
654 - def _children(self):
655 return self._residues
656
657 - def __repr__(self):
658 return "<Chain {0.id}: {0.type!r}>".format(self)
659
660 - def __len__(self):
661 return self._residues.length
662 663 @property
664 - def id(self):
665 """ 666 Chain's ID 667 @rtype: str 668 """ 669 return self._id
670 @id.setter
671 - def id(self, id):
672 if not isinstance(id, csb.core.string): 673 raise ValueError(id) 674 id = id.strip() 675 if self._structure: 676 self._structure.chains._update_chain_id(self, id) 677 self._id = id
678 679 @property
680 - def accession(self):
681 """ 682 Accession number 683 @rtype: str 684 """ 685 return self._accession
686 @accession.setter
687 - def accession(self, accession):
688 if self._structure: 689 raise InvalidOperation("Only the accession of the parent structure can be altered") 690 if accession is None: 691 raise ValueError(accession) 692 self._accession = str(accession).strip()
693 694 @property
695 - def type(self):
696 """ 697 Chain type - any member of L{SequenceTypes} 698 @rtype: enum item 699 """ 700 return self._type
701 @type.setter
702 - def type(self, type):
703 if type.enum is not SequenceTypes: 704 raise TypeError(type) 705 self._type = type
706 707 @property
708 - def residues(self):
709 """ 710 Rank-based access to Chain's L{Residue}s 711 @rtype: L{ChainResiduesCollection} 712 """ 713 return self._residues
714 715 @property
716 - def items(self):
717 return iter(self._residues)
718 719 @property
720 - def torsion(self):
721 """ 722 Torsion angles 723 @rtype: L{TorsionAnglesCollection} 724 """ 725 if not self._torsion_computed: 726 raise InvalidOperation('The correctness of the data is not guaranteed ' 727 'until chain.compute_torsion() is invoked.') 728 729 torsion = TorsionAnglesCollection() 730 731 for r in self.residues: 732 if r.torsion is None: 733 torsion.append(TorsionAngles(None, None, None)) 734 else: 735 torsion.append(r.torsion) 736 737 return torsion
738 739 @property
740 - def has_torsion(self):
741 """ 742 True if C{Chain.compute_torsion} had been invoked 743 @rtype: bool 744 """ 745 return self._torsion_computed
746 747 @property
748 - def length(self):
749 """ 750 Number of residues 751 @rtype: int 752 """ 753 return self._residues.length
754 755 @property
756 - def entry_id(self):
757 """ 758 Accession number + chain ID 759 @rtype: str 760 """ 761 if self._accession and self._id: 762 return self._accession + self._id 763 else: 764 return None
765 766 @property
767 - def name(self):
768 """ 769 Chain name 770 @rtype: str 771 """ 772 return self._name
773 @name.setter
774 - def name(self, value):
775 if value is not None: 776 value = str(value).strip() 777 self._name = value
778 779 @property
780 - def molecule_id(self):
781 """ 782 PDB MOL ID of this chain 783 @rtype: int 784 """ 785 return self._molecule_id
786 @molecule_id.setter
787 - def molecule_id(self, value):
788 self._molecule_id = value
789 790 @property
791 - def header(self):
792 """ 793 FASTA header in PDB format 794 @rtype: str 795 """ 796 header = "{0._accession}_{0._id} mol:{1} length:{0.length} {0.name}" 797 return header.format(self, str(self.type).lower())
798 799 @property
800 - def sequence(self):
801 """ 802 Chain sequence 803 @rtype: str 804 """ 805 sequence = [] 806 gap = str(self.alphabet.GAP) 807 808 for residue in self.residues: 809 if residue and residue.type: 810 sequence.append(str(residue.type)) 811 else: 812 sequence.append(gap) 813 814 return ''.join(sequence)
815 816 @property
817 - def alphabet(self):
818 """ 819 Sequence alphabet corresponding to the current chain type 820 @rtype: L{csb.core.enum} 821 """ 822 return SequenceAlphabets.get(self.type)
823 824 @property
825 - def secondary_structure(self):
826 """ 827 Secondary structure (if available) 828 @rtype: L{SecondaryStructure} 829 """ 830 return self._secondary_structure
831 @secondary_structure.setter
832 - def secondary_structure(self, ss):
833 if not isinstance(ss, SecondaryStructure): 834 raise TypeError(ss) 835 if len(ss) > 0: 836 if (ss[ss.last_index].end > self._residues.last_index): 837 raise ValueError('Secondary structure out of range') 838 self._secondary_structure = ss
839
840 - def clone(self):
841 """ 842 Make a deep copy of the chain. If this chain is part of a structure, 843 detach from it. 844 845 @return: a deep copy of self 846 @rtype: L{Chain} 847 """ 848 start, end = self.residues.start_index, self.residues.last_index 849 return self.subregion(start, end, clone=True)
850
851 - def subregion(self, start, end, clone=False):
852 """ 853 Extract a subchain defined by [start, end]. If clone is True, this 854 is a deep copy of the chain. Otherwise same as: 855 856 >>> chain.residues[start : end + 1] 857 858 but coordinates are checked and a Chain instance is returned. 859 860 @param start: start position of the sub-region 861 @type start: int 862 @param end: end position 863 @type end: int 864 @param clone: if True, a deep copy of the sub-region is returned, 865 otherwise - a shallow one 866 @type clone: bool 867 868 869 @return: a new chain, made from the residues of the extracted region 870 @rtype: L{Chain} 871 872 @raise IndexError: if start/end positions are out of range 873 """ 874 if start < self.residues.start_index or start > self.residues.last_index: 875 raise IndexError('The start position is out of range {0.start_index} .. {0.last_index}'.format(self.residues)) 876 if end < self.residues.start_index or end > self.residues.last_index: 877 raise IndexError('The end position is out of range {0.start_index} .. {0.last_index}'.format(self.residues)) 878 879 residues = self.residues[start : end + 1] 880 881 if clone: 882 residues = [r.clone() for r in residues] 883 884 chain = Chain(self.id, accession=self.accession, name=self.name, 885 type=self.type, residues=residues, molecule_id=self.molecule_id) 886 if chain.secondary_structure: 887 chain.secondary_structure = self.secondary_structure.subregion(start, end) 888 chain._torsion_computed = self._torsion_computed 889 890 return chain
891
892 - def find(self, sequence_number, insertion_code=None):
893 """ 894 Get a residue by its original Residue Sequence Number and Insertion Code. 895 896 @param sequence_number: PDB sequence number of the residue 897 @type sequence_number: str 898 @param insertion_code: PDB insertion code of the residue (if any) 899 @type insertion_code: str 900 901 @return: the residue object with such an ID 902 @rtype: L{Residue} 903 904 @raise csb.core.ItemNotFoundError: if no residue with that ID exists 905 """ 906 res_id = str(sequence_number).strip() 907 908 if insertion_code is not None: 909 insertion_code = str(insertion_code).strip() 910 res_id += insertion_code 911 912 return self.residues._get_residue(res_id)
913
914 - def compute_torsion(self):
915 """ 916 Iterate over all residues in the chain, compute and set their torsion property. 917 918 @raise Missing3DStructureError: when a 3D structure is absent 919 @raise Broken3DStructureError: when a given atom cannot be retrieved from any residue 920 """ 921 if self.type != SequenceTypes.Protein: 922 raise NotImplementedError() 923 924 for i, residue in enumerate(self.residues, start=self.residues.start_index): 925 926 prev_residue, next_residue = None, None 927 928 if i > self.residues.start_index: 929 prev_residue = self.residues[i - 1] 930 if i < self.residues.last_index: 931 next_residue = self.residues[i + 1] 932 933 residue.torsion = residue.compute_torsion(prev_residue, next_residue, strict=False) 934 935 self._torsion_computed = True
936
937 - def superimpose(self, other, what=['CA'], how=AlignmentTypes.Global):
938 """ 939 Find the optimal fit between C{self} and C{other}. Return L{SuperimposeInfo} 940 (RotationMatrix, translation Vector and RMSD), such that: 941 942 >>> other.transform(rotation_matrix, translation_vector) 943 944 will result in C{other}'s coordinates superimposed over C{self}. 945 946 @param other: the subject (movable) chain 947 @type other: L{Chain} 948 @param what: a list of atom kinds, e.g. ['CA'] 949 @type what: list 950 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 951 @type how: L{csb.core.EnumItem} 952 953 @return: superimposition info object, containing rotation matrix, translation 954 vector and computed RMSD 955 @rtype: L{SuperimposeInfo} 956 957 @raise AlignmentArgumentLengthError: when the lengths of the argument chains differ 958 """ 959 if self.length != other.length or self.length < 1: 960 raise AlignmentArgumentLengthError('Both chains must be of the same and positive length') 961 962 x = self.get_coordinates(what) 963 y = other.get_coordinates(what) 964 assert len(x) == len(y) 965 966 if how == AlignmentTypes.Global: 967 r, t = csb.bio.utils.fit(x, y) 968 else: 969 r, t = csb.bio.utils.fit_wellordered(x, y) 970 971 rmsd = csb.bio.utils.rmsd(x, y) 972 973 return SuperimposeInfo(r, t, rmsd=rmsd) 974
975 - def align(self, other, what=['CA'], how=AlignmentTypes.Global):
976 """ 977 Align C{other}'s alpha carbons over self in space and return L{SuperimposeInfo}. 978 Coordinates of C{other} are overwritten in place using the rotation matrix 979 and translation vector in L{SuperimposeInfo}. Alias for:: 980 981 R, t = self.superimpose(other, what=['CA']) 982 other.transform(R, t) 983 984 @param other: the subject (movable) chain 985 @type other: L{Chain} 986 @param what: a list of atom kinds, e.g. ['CA'] 987 @type what: list 988 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 989 @type how: L{csb.core.EnumItem} 990 991 @return: superimposition info object, containing rotation matrix, translation 992 vector and computed RMSD 993 @rtype: L{SuperimposeInfo} 994 """ 995 result = self.superimpose(other, what=what, how=how) 996 other.transform(result.rotation, result.translation) 997 998 return result 999
1000 - def rmsd(self, other, what=['CA']):
1001 """ 1002 Compute the C-alpha RMSD against another chain (assuming equal length). 1003 Chains are superimposed with Least Squares Fit / Singular Value Decomposition. 1004 1005 @param other: the subject (movable) chain 1006 @type other: L{Chain} 1007 @param what: a list of atom kinds, e.g. ['CA'] 1008 @type what: list 1009 1010 @return: computed RMSD over the specified atom kinds 1011 @rtype: float 1012 """ 1013 1014 if self.length != other.length or self.length < 1: 1015 raise ValueError('Both chains must be of the same and positive length ' 1016 '(got {0} and {1})'.format(self.length, other.length)) 1017 1018 x = self.get_coordinates(what) 1019 y = other.get_coordinates(what) 1020 assert len(x) == len(y) 1021 1022 return csb.bio.utils.rmsd(x, y)
1023
1024 - def tm_superimpose(self, other, what=['CA'], how=AlignmentTypes.Global):
1025 """ 1026 Find the optimal fit between C{self} and C{other}. Return L{SuperimposeInfo} 1027 (RotationMatrix, translation Vector and TM-score), such that: 1028 1029 >>> other.transform(rotation_matrix, translation_vector) 1030 1031 will result in C{other}'s coordinates superimposed over C{self}. 1032 1033 @param other: the subject (movable) chain 1034 @type other: L{Chain} 1035 @param what: a list of atom kinds, e.g. ['CA'] 1036 @type what: list 1037 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 1038 @type how: L{csb.core.EnumItem} 1039 1040 @return: superimposition info object, containing rotation matrix, translation 1041 vector and computed TM-score 1042 @rtype: L{SuperimposeInfo} 1043 1044 @raise AlignmentArgumentLengthError: when the lengths of the argument chains differ 1045 """ 1046 1047 if self.length != other.length or self.length < 1: 1048 raise ValueError('Both chains must be of the same and positive length') 1049 1050 x = self.get_coordinates(what) 1051 y = other.get_coordinates(what) 1052 assert len(x) == len(y) 1053 1054 L_ini_min = 0 1055 if how == AlignmentTypes.Global: 1056 fit = csb.bio.utils.fit 1057 elif how == AlignmentTypes.Local: 1058 fit = csb.bio.utils.fit_wellordered 1059 else: 1060 # TMscore.f like search (slow) 1061 fit = csb.bio.utils.fit 1062 L_ini_min = 4 1063 1064 r, t, tm = csb.bio.utils.tm_superimpose(x, y, fit, None, None, L_ini_min) 1065 1066 return SuperimposeInfo(r,t, tm_score=tm) 1067
1068 - def tm_score(self, other, what=['CA']):
1069 """ 1070 Compute the C-alpha TM-Score against another chain (assuming equal chain length 1071 and optimal configuration - no fitting is done). 1072 1073 @param other: the subject (movable) chain 1074 @type other: L{Chain} 1075 @param what: a list of atom kinds, e.g. ['CA'] 1076 @type what: list 1077 1078 @return: computed TM-Score over the specified atom kinds 1079 @rtype: float 1080 """ 1081 1082 if self.length != other.length or self.length < 1: 1083 raise ValueError('Both chains must be of the same and positive length') 1084 1085 x = self.get_coordinates(what) 1086 y = other.get_coordinates(what) 1087 assert len(x) == len(y) 1088 1089 return csb.bio.utils.tm_score(x, y)
1090
1091 -class ChainResiduesCollection(csb.core.CollectionContainer):
1092
1093 - def __init__(self, chain, residues):
1094 super(ChainResiduesCollection, self).__init__(type=Residue, start_index=1) 1095 self.__container = chain 1096 self.__lookup = { } 1097 1098 if residues is not None: 1099 for residue in residues: 1100 self.append(residue)
1101
1102 - def __repr__(self):
1103 if len(self) > 0: 1104 return "<ChainResidues: {0} ... {1}>".format(self[self.start_index], self[self.last_index]) 1105 else: 1106 return "<ChainResidues: empty>"
1107 1108 @property
1109 - def _exception(self):
1110 return EntityIndexError
1111
1112 - def append(self, residue):
1113 """ 1114 Append a new residue to the chain. 1115 1116 @param residue: the new residue 1117 @type residue: L{Residue} 1118 1119 @raise DuplicateResidueIDError: if a residue with the same ID already exists 1120 """ 1121 if residue.id and residue.id in self.__lookup: 1122 raise DuplicateResidueIDError('A residue with ID {0} is already defined within the chain'.format(residue.id)) 1123 index = super(ChainResiduesCollection, self).append(residue) 1124 residue._container = self 1125 self.__container._torsion_computed = False 1126 self._add(residue) 1127 return index
1128
1129 - def _contains(self, id):
1130 return id in self.__lookup
1131
1132 - def _remove(self, id):
1133 if id in self.__lookup: 1134 del self.__lookup[id]
1135
1136 - def _add(self, residue):
1137 self.__lookup[residue.id] = residue
1138
1139 - def _get_residue(self, id):
1140 try: 1141 return self.__lookup[id] 1142 except KeyError: 1143 raise csb.core.ItemNotFoundError(id)
1144
1145 -class Residue(csb.core.AbstractNIContainer, AbstractEntity):
1146 """ 1147 Base class representing a single residue. Provides a dictionary-like 1148 access to the atoms contained in the residue: 1149 1150 >>> residue['CA'] 1151 <Atom [3048]: CA> 1152 >>> residue.atoms['CA'] 1153 <Atom [3048]: CA> 1154 >>> residue.items 1155 <iterator of Atom-s> 1156 1157 @param rank: rank of the residue with respect to the chain 1158 @type rank: int 1159 @param type: residue type - a member of any L{SequenceAlphabets} 1160 @type type: L{csb.core.EnumItem} 1161 @param sequence_number: PDB sequence number of the residue 1162 @type sequence_number: str 1163 @param insertion_code: PDB insertion code, if any 1164 @type insertion_code: str 1165 """
1166 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1167 1168 self._type = None 1169 self._pdb_name = None 1170 self._rank = int(rank) 1171 self._atoms = ResidueAtomsTable(self) 1172 self._secondary_structure = None 1173 self._torsion = None 1174 self._sequence_number = None 1175 self._insertion_code = None 1176 self._container = None 1177 1178 self.type = type 1179 self.id = sequence_number, insertion_code 1180 self._pdb_name = repr(type)
1181 1182 @property
1183 - def _children(self):
1184 return self._atoms
1185
1186 - def __repr__(self):
1187 return '<{1} [{0.rank}]: {0.type!r} {0.id}>'.format(self, self.__class__.__name__)
1188 1189 @property
1190 - def type(self):
1191 """ 1192 Residue type - a member of any sequence alphabet 1193 @rtype: enum item 1194 """ 1195 return self._type
1196 @type.setter
1197 - def type(self, type):
1198 if type.enum not in (SequenceAlphabets.Protein, SequenceAlphabets.Nucleic, SequenceAlphabets.Unknown): 1199 raise TypeError(type) 1200 self._type = type
1201 1202 @property
1203 - def rank(self):
1204 """ 1205 Residue's position in the sequence (1-based) 1206 @rtype: int 1207 """ 1208 return self._rank
1209 1210 @property
1211 - def secondary_structure(self):
1212 """ 1213 Secondary structure element this residue is part of 1214 @rtype: L{SecondaryStructureElement} 1215 """ 1216 return self._secondary_structure
1217 @secondary_structure.setter
1218 - def secondary_structure(self, structure):
1219 if not isinstance(structure, SecondaryStructureElement): 1220 raise TypeError(structure) 1221 self._secondary_structure = structure
1222 1223 @property
1224 - def torsion(self):
1225 """ 1226 Torsion angles 1227 @rtype: L{TorsionAngles} 1228 """ 1229 return self._torsion
1230 @torsion.setter
1231 - def torsion(self, torsion):
1232 if not isinstance(torsion, TorsionAngles): 1233 raise TypeError(torsion) 1234 self._torsion = torsion
1235 1236 @property
1237 - def atoms(self):
1238 """ 1239 Access residue's atoms by atom name 1240 @rtype: L{ResidueAtomsTable} 1241 """ 1242 return self._atoms
1243 1244 @property
1245 - def items(self):
1246 for atom in self._atoms: 1247 yield self._atoms[atom]
1248 1249 @property
1250 - def sequence_number(self):
1251 """ 1252 PDB sequence number (if residue.has_structure is True) 1253 @rtype: int 1254 """ 1255 return self._sequence_number
1256 1257 @property
1258 - def insertion_code(self):
1259 """ 1260 PDB insertion code (if defined) 1261 @rtype: str 1262 """ 1263 return self._insertion_code
1264 1265 @property
1266 - def id(self):
1267 """ 1268 PDB sequence number [+ insertion code] 1269 @rtype: str 1270 """ 1271 if self._sequence_number is None: 1272 return None 1273 elif self._insertion_code is not None: 1274 return str(self._sequence_number) + self._insertion_code 1275 else: 1276 return str(self._sequence_number)
1277 @id.setter
1278 - def id(self, value):
1279 sequence_number, insertion_code = value 1280 old_id = self.id 1281 id = '' 1282 if sequence_number is not None: 1283 sequence_number = int(sequence_number) 1284 id = str(sequence_number) 1285 if insertion_code is not None: 1286 insertion_code = str(insertion_code).strip() 1287 id += insertion_code 1288 if sequence_number is None: 1289 raise InvalidOperation('sequence_number must be defined when an insertion_code is specified.') 1290 if old_id != id: 1291 if self._container: 1292 if self._container._contains(id): 1293 raise DuplicateResidueIDError('A residue with ID {0} is already defined within the chain'.format(id)) 1294 self._container._remove(old_id) 1295 self._sequence_number = sequence_number 1296 self._insertion_code = insertion_code 1297 if self._container: 1298 self._container._add(self)
1299 1300 @property
1301 - def has_structure(self):
1302 """ 1303 True if this residue has any atoms 1304 @rtype: bool 1305 """ 1306 return len(self.atoms) > 0
1307
1308 - def get_coordinates(self, what=None, skip=False):
1309 1310 coords = [] 1311 1312 if not self.has_structure: 1313 if skip: 1314 return numpy.array([]) 1315 raise Missing3DStructureError(self) 1316 1317 for atom_kind in (what or self.atoms): 1318 if atom_kind in self.atoms: 1319 coords.append(self.atoms[atom_kind].vector) 1320 else: 1321 if skip: 1322 continue 1323 raise Broken3DStructureError('Could not retrieve {0} atom'.format(atom_kind)) 1324 1325 return numpy.array(coords)
1326
1327 - def clone(self):
1328 1329 container = self._container 1330 self._container = None 1331 clone = copy.deepcopy(self) 1332 self._container = container 1333 1334 return clone
1335 1336 @staticmethod
1337 - def create(sequence_type, *a, **k):
1338 """ 1339 Residue factory method, which returns the proper L{Residue} instance based on 1340 the specified C{sequence_type}. All additional arguments are used to initialize 1341 the subclass by passing them automatically to the underlying constructor. 1342 1343 @param sequence_type: create a Residue of that SequenceType 1344 @type sequence_type: L{csb.core.EnumItem} 1345 1346 @return: a new residue of the proper subclass 1347 @rtype: L{Residue} subclass 1348 1349 @raise ValueError: if the sequence type is not known 1350 """ 1351 if sequence_type == SequenceTypes.Protein: 1352 return ProteinResidue(*a, **k) 1353 elif sequence_type == SequenceTypes.NucleicAcid: 1354 return NucleicResidue(*a, **k) 1355 elif sequence_type == SequenceTypes.Unknown: 1356 return UnknownResidue(*a, **k) 1357 else: 1358 raise ValueError(sequence_type)
1359
1360 -class ProteinResidue(Residue):
1361 """ 1362 Represents a single amino acid residue. 1363 1364 @param rank: rank of the residue with respect to the chain 1365 @type rank: int 1366 @param type: residue type - a member of 1367 L{csb.bio.sequence.SequenceAlphabets.Protein} 1368 @type type: L{csb.core.EnumItem} 1369 @param sequence_number: PDB sequence number of the residue 1370 @type sequence_number: str 1371 @param insertion_code: PDB insertion code, if any 1372 @type insertion_code: str 1373 """ 1374
1375 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1376 1377 if isinstance(type, csb.core.string): 1378 try: 1379 if len(type) == 3: 1380 type = csb.core.Enum.parsename(SequenceAlphabets.Protein, type) 1381 else: 1382 type = csb.core.Enum.parse(SequenceAlphabets.Protein, type) 1383 except (csb.core.EnumMemberError, csb.core.EnumValueError): 1384 raise ValueError("'{0}' is not a valid amino acid".format(type)) 1385 elif type.enum is not SequenceAlphabets.Protein: 1386 raise TypeError(type) 1387 1388 super(ProteinResidue, self).__init__(rank, type, sequence_number, insertion_code)
1389
1390 - def compute_torsion(self, prev_residue, next_residue, strict=True):
1391 """ 1392 Compute the torsion angles of the current residue with neighboring residues 1393 C{prev_residue} and C{next_residue}. 1394 1395 @param prev_residue: the previous residue in the chain 1396 @type prev_residue: L{Residue} 1397 @param next_residue: the next residue in the chain 1398 @type next_residue: L{Residue} 1399 @param strict: if True, L{Broken3DStructureError} is raised if either C{prev_residue} 1400 or C{next_residue} has a broken structure, else the error is silently 1401 ignored and an empty L{TorsionAngles} instance is created 1402 @type strict: bool 1403 1404 @return: a L{TorsionAngles} object, holding the phi, psi and omega values 1405 @rtype: L{TorsionAngles} 1406 1407 @raise Broken3DStructureError: when a specific atom cannot be found 1408 """ 1409 if prev_residue is None and next_residue is None: 1410 raise ValueError('At least one neighboring residue is required to compute the torsion.') 1411 1412 angles = TorsionAngles(None, None, None, units=AngleUnits.Degrees) 1413 1414 for residue in (self, prev_residue, next_residue): 1415 if residue is not None and not residue.has_structure: 1416 if strict: 1417 raise Missing3DStructureError(repr(residue)) 1418 elif residue is self: 1419 return angles 1420 1421 try: 1422 n = self._atoms['N'].vector 1423 ca = self._atoms['CA'].vector 1424 c = self._atoms['C'].vector 1425 except csb.core.ItemNotFoundError as missing_atom: 1426 if strict: 1427 raise Broken3DStructureError('Could not retrieve {0} atom from the current residue {1!r}.'.format( 1428 missing_atom, self)) 1429 else: 1430 return angles 1431 1432 try: 1433 if prev_residue is not None and prev_residue.has_structure: 1434 prev_c = prev_residue._atoms['C'].vector 1435 angles.phi = csb.numeric.dihedral_angle(prev_c, n, ca, c) 1436 except csb.core.ItemNotFoundError as missing_prevatom: 1437 if strict: 1438 raise Broken3DStructureError('Could not retrieve {0} atom from the i-1 residue {1!r}.'.format( 1439 missing_prevatom, prev_residue)) 1440 try: 1441 if next_residue is not None and next_residue.has_structure: 1442 next_n = next_residue._atoms['N'].vector 1443 angles.psi = csb.numeric.dihedral_angle(n, ca, c, next_n) 1444 next_ca = next_residue._atoms['CA'].vector 1445 angles.omega = csb.numeric.dihedral_angle(ca, c, next_n, next_ca) 1446 except csb.core.ItemNotFoundError as missing_nextatom: 1447 if strict: 1448 raise Broken3DStructureError('Could not retrieve {0} atom from the i+1 residue {1!r}.'.format( 1449 missing_nextatom, next_residue)) 1450 1451 return angles
1452
1453 -class NucleicResidue(Residue):
1454 """ 1455 Represents a single nucleotide residue. 1456 1457 @param rank: rank of the residue with respect to the chain 1458 @type rank: int 1459 @param type: residue type - a member of 1460 L{csb.bio.sequence.SequenceAlphabets.Nucleic} 1461 @type type: L{csb.core.EnumItem} 1462 @param sequence_number: PDB sequence number of the residue 1463 @type sequence_number: str 1464 @param insertion_code: PDB insertion code, if any 1465 @type insertion_code: str 1466 """ 1467
1468 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1469 1470 if isinstance(type, csb.core.string): 1471 try: 1472 if len(type) > 1: 1473 type = csb.core.Enum.parsename(SequenceAlphabets.Nucleic, type) 1474 else: 1475 type = csb.core.Enum.parse(SequenceAlphabets.Nucleic, type) 1476 except (csb.core.EnumMemberError, csb.core.EnumValueError): 1477 raise ValueError("'{0}' is not a valid nucleotide".format(type)) 1478 elif type.enum is not SequenceAlphabets.Nucleic: 1479 raise TypeError(type) 1480 1481 super(NucleicResidue, self).__init__(rank, type, sequence_number, insertion_code) 1482 self._pdb_name = str(type)
1483
1484 -class UnknownResidue(Residue):
1485
1486 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1490
1491 -class ResidueAtomsTable(csb.core.DictionaryContainer):
1492 """ 1493 Represents a collection of atoms. Provides dictionary-like access, 1494 where PDB atom names are used for lookup. 1495 """
1496 - def __init__(self, residue, atoms=None):
1497 1498 self.__residue = residue 1499 super(ResidueAtomsTable, self).__init__() 1500 1501 if atoms is not None: 1502 for atom in atoms: 1503 self.append(atom)
1504
1505 - def __repr__(self):
1506 if len(self) > 0: 1507 return "<ResidueAtoms: {0}>".format(', '.join(self.keys())) 1508 else: 1509 return "<ResidueAtoms: empty>"
1510 1511 @property
1512 - def _exception(self):
1513 return AtomNotFoundError
1514
1515 - def append(self, atom):
1516 """ 1517 Append a new Atom to the catalog. 1518 1519 If the atom has an alternate position, a disordered proxy will be created instead and the 1520 atom will be appended to the L{DisorderedAtom}'s list of children. If a disordered atom 1521 with that name already exists, the atom will be appended to its children only. 1522 If an atom with the same name exists, but it was erroneously not marked as disordered, 1523 that terrible condition will be fixed too. 1524 1525 @param atom: the new atom to append 1526 @type atom: L{Atom} 1527 1528 @raise DuplicateAtomIDError: if an atom with the same sequence number and 1529 insertion code already exists in that residue 1530 """ 1531 if atom.residue and atom.residue is not self.__residue: 1532 raise InvalidOperation('This atom is part of another residue') 1533 if atom.alternate or (atom.name in self and isinstance(self[atom.name], DisorderedAtom)): 1534 if atom.name not in self: 1535 atom._residue = self.__residue 1536 dis_atom = DisorderedAtom(atom) 1537 super(ResidueAtomsTable, self).append(dis_atom.name, dis_atom) 1538 else: 1539 if not isinstance(self[atom.name], DisorderedAtom): 1540 buggy_atom = self[atom.name] 1541 assert buggy_atom.alternate in (None, False) 1542 buggy_atom.alternate = True 1543 self.update(atom.name, DisorderedAtom(buggy_atom)) 1544 if not atom.alternate: 1545 atom.alternate = True 1546 atom._residue = self.__residue 1547 self[atom.name].append(atom) 1548 else: 1549 if atom.name in self: 1550 raise DuplicateAtomIDError('Atom {0} is already defined for {1}'.format( 1551 atom.name, self.__residue)) 1552 else: 1553 super(ResidueAtomsTable, self).append(atom.name, atom) 1554 atom._residue = self.__residue
1555
1556 - def update(self, atom_name, atom):
1557 """ 1558 Update the atom with the specified name. 1559 1560 @param atom_name: update key 1561 @type atom_name: str 1562 @param atom: new value for this key 1563 @type atom: L{Atom} 1564 1565 @raise ValueError: if C{atom} has a different name than C{atom_name} 1566 """ 1567 if atom.name != atom_name: 1568 raise ValueError("Atom's name differs from the specified key.") 1569 if atom.residue is not self.__residue: 1570 atom._residue = self.__residue 1571 1572 super(ResidueAtomsTable, self)._update({atom_name: atom})
1573
1574 -class Atom(AbstractEntity):
1575 """ 1576 Represents a single atom in space. 1577 1578 @param serial_number: atom's UID 1579 @type serial_number: int 1580 @param name: atom's name 1581 @type name: str 1582 @param element: corresponding L{ChemElements} 1583 @type element: L{csb.core.EnumItem} 1584 @param vector: atom's coordinates 1585 @type vector: numpy array 1586 @param alternate: if True, means that this is a wobbling atom with multiple alternative 1587 locations 1588 @type alternate: bool 1589 """
1590 - def __init__(self, serial_number, name, element, vector, alternate=False):
1591 1592 self._serial_number = None 1593 self._name = None 1594 self._element = None 1595 self._residue = None 1596 self._vector = None 1597 self._alternate = False 1598 self._bfactor = None 1599 self._occupancy = None 1600 self._charge = None 1601 1602 if not isinstance(name, csb.core.string): 1603 raise TypeError(name) 1604 name_compact = name.strip() 1605 if len(name_compact) < 1: 1606 raise ValueError(name) 1607 self._name = name_compact 1608 self._full_name = name 1609 1610 if isinstance(element, csb.core.string): 1611 element = csb.core.Enum.parsename(ChemElements, element) 1612 elif element is None: 1613 pass 1614 elif element.enum is not ChemElements: 1615 raise TypeError(element) 1616 self._element = element 1617 1618 # pass type- and value-checking control to setters 1619 self.serial_number = serial_number 1620 self.vector = vector 1621 self.alternate = alternate
1622
1623 - def __repr__(self):
1624 return "<Atom [{0.serial_number}]: {0.name}>".format(self)
1625
1626 - def __lt__(self, other):
1627 return self.serial_number < other.serial_number
1628
1629 - def transform(self, rotation, translation):
1630 1631 vector = numpy.dot(self.vector, numpy.transpose(rotation)) + translation 1632 self.vector = vector
1633
1634 - def get_coordinates(self, what=None, skip=False):
1635 1636 if what is None: 1637 what = [self.name] 1638 1639 if self.name in what: 1640 return numpy.array([self.vector.copy()]) 1641 elif skip: 1642 return numpy.array([]) 1643 else: 1644 raise Missing3DStructureError()
1645
1646 - def clone(self):
1647 1648 residue = self._residue 1649 self._residue = None 1650 clone = copy.deepcopy(self) 1651 self._residue = residue 1652 1653 return clone
1654 1655 @property
1656 - def serial_number(self):
1657 """ 1658 PDB serial number 1659 @rtype: int 1660 """ 1661 return self._serial_number
1662 @serial_number.setter
1663 - def serial_number(self, number):
1664 if not isinstance(number, int) or number < 1: 1665 raise TypeError(number) 1666 self._serial_number = number
1667 1668 @property
1669 - def name(self):
1670 """ 1671 PDB atom name (trimmed) 1672 @rtype: str 1673 """ 1674 return self._name
1675 1676 @property
1677 - def element(self):
1678 """ 1679 Chemical element - a member of L{ChemElements} 1680 @rtype: enum item 1681 """ 1682 return self._element
1683 1684 @property
1685 - def residue(self):
1686 """ 1687 Residue instance that owns this atom (if available) 1688 @rtype: L{Residue} 1689 """ 1690 return self._residue
1691 @residue.setter
1692 - def residue(self, residue):
1693 if self._residue: 1694 raise InvalidOperation('This atom is already part of a residue.') 1695 if not isinstance(residue, Residue): 1696 raise TypeError(residue) 1697 self._residue = residue
1698 1699 @property
1700 - def vector(self):
1701 """ 1702 Atom's 3D coordinates (x, y, z) 1703 @rtype: numpy.array 1704 """ 1705 return self._vector
1706 @vector.setter
1707 - def vector(self, vector):
1708 if numpy.shape(vector) != (3,): 1709 raise ValueError("Three dimensional vector expected") 1710 self._vector = numpy.array(vector)
1711 1712 @property
1713 - def alternate(self):
1714 """ 1715 Alternative location flag 1716 @rtype: str 1717 """ 1718 return self._alternate
1719 @alternate.setter
1720 - def alternate(self, value):
1721 self._alternate = value
1722 1723 @property
1724 - def bfactor(self):
1725 """ 1726 Temperature factor 1727 @rtype: float 1728 """ 1729 return self._bfactor
1730 @bfactor.setter
1731 - def bfactor(self, value):
1732 self._bfactor = value
1733 1734 @property
1735 - def occupancy(self):
1736 """ 1737 Occupancy number 1738 @rtype: float 1739 """ 1740 return self._occupancy
1741 @occupancy.setter
1742 - def occupancy(self, value):
1743 self._occupancy = value
1744 1745 @property
1746 - def charge(self):
1747 """ 1748 Charge 1749 @rtype: int 1750 """ 1751 return self._charge
1752 @charge.setter
1753 - def charge(self, value):
1754 self._charge = value
1755 1756 @property
1757 - def items(self):
1758 return iter([])
1759
1760 -class DisorderedAtom(csb.core.CollectionContainer, Atom):
1761 """ 1762 A wobbling atom, which has alternative locations. Each alternative is represented 1763 as a 'normal' L{Atom}. The atom with a highest occupancy is selected as a representative, 1764 hence a DisorderedAtom behaves as a regular L{Atom} (proxy of the representative) as well 1765 as a collection of Atoms. 1766 1767 @param atom: the first atom to be appended to the collection of alternatives. It 1768 is automatically defined as a representative, until a new atom with 1769 higher occupancy is appended to the collection 1770 @type atom: L{Atom} 1771 """ 1772
1773 - def __init__(self, atom):
1774 1775 super(DisorderedAtom, self).__init__(type=Atom) 1776 1777 self.__rep = None 1778 self.__alt = {} 1779 1780 self.append(atom)
1781
1782 - def __getattr__(self, name):
1783 try: 1784 return object.__getattribute__(self, name) 1785 except AttributeError: 1786 subject = object.__getattribute__(self, '_DisorderedAtom__rep') 1787 return getattr(subject, name)
1788
1789 - def append(self, atom):
1790 """ 1791 Append a new atom to the collection of alternatives. 1792 1793 @param atom: the new alternative 1794 @type atom: L{Atom} 1795 """ 1796 self.__update_rep(atom) 1797 self.__alt[atom.alternate] = atom 1798 1799 super(DisorderedAtom, self).append(atom)
1800
1801 - def find(self, altloc):
1802 """ 1803 Retrieve a specific atom by its altloc identifier. 1804 1805 @param altloc: alternative location identifier 1806 @type altloc: str 1807 1808 @rtype: L{Atom} 1809 """ 1810 if altloc in self.__alt: 1811 return self.__alt[altloc] 1812 else: 1813 for atom in self: 1814 if atom.alternate == altloc: 1815 return Atom 1816 1817 raise EntityNotFoundError(altloc)
1818
1819 - def transform(self, rotation, translation):
1820 1821 for atom in self: 1822 atom.transform(rotation, translation)
1823
1824 - def __update_rep(self, atom):
1825 1826 if self.__rep is None or \ 1827 ((self.__rep.occupancy != atom.occupancy) and (self.__rep.occupancy < atom.occupancy)): 1828 1829 self.__rep = atom
1830
1831 - def __repr__(self):
1832 return "<DisorderedAtom: {0.length} alternative locations>".format(self)
1833
1834 -class SuperimposeInfo(object):
1835 """ 1836 Describes a structural alignment result. 1837 1838 @type rotation: Numpy Array 1839 @type translation: L{Vector} 1840 @type rmsd: float 1841 """
1842 - def __init__(self, rotation, translation, rmsd=None, tm_score=None):
1843 1844 self.rotation = rotation 1845 self.translation = translation 1846 self.rmsd = rmsd 1847 self.tm_score = tm_score
1848
1849 -class SecondaryStructureElement(object):
1850 """ 1851 Describes a Secondary Structure Element. 1852 1853 @param start: start position with reference to the chain 1854 @type start: float 1855 @param end: end position with reference to the chain 1856 @type end: float 1857 @param type: element type - a member of the L{SecStructures} enum 1858 @type type: csb.core.EnumItem 1859 @param score: secondary structure prediction confidence, if available 1860 @type score: int 1861 1862 @raise IndexError: if start/end positions are out of range 1863 """
1864 - def __init__(self, start, end, type, score=None):
1865 1866 if not (0 < start <= end): 1867 raise IndexError('Element coordinates are out of range: 1 <= start <= end.') 1868 1869 self._start = None 1870 self._end = None 1871 self._type = None 1872 self._score = None 1873 1874 self.start = start 1875 self.end = end 1876 self.type = type 1877 1878 if score is not None: 1879 self.score = score
1880
1881 - def __lt__(self, other):
1882 return self.start < other.start
1883
1884 - def __eq__(self, other):
1885 return (self.type == other.type 1886 and self.start == other.start 1887 and self.end == other.end)
1888
1889 - def __str__(self):
1890 return self.to_string()
1891
1892 - def __repr__(self):
1893 return "<{0.type!r}: {0.start}-{0.end}>".format(self)
1894 1895 @property
1896 - def start(self):
1897 """ 1898 Start position (1-based) 1899 @rtype: int 1900 """ 1901 return self._start
1902 @start.setter
1903 - def start(self, value):
1904 if value is not None: 1905 value = int(value) 1906 if value < 1: 1907 raise ValueError(value) 1908 self._start = value
1909 1910 @property
1911 - def end(self):
1912 """ 1913 End position (1-based) 1914 @rtype: int 1915 """ 1916 return self._end
1917 @end.setter
1918 - def end(self, value):
1919 if value is not None: 1920 value = int(value) 1921 if value < 1: 1922 raise ValueError(value) 1923 self._end = value
1924 1925 @property
1926 - def type(self):
1927 """ 1928 Secondary structure type - a member of L{SecStructures} 1929 @rtype: enum item 1930 """ 1931 return self._type
1932 @type.setter
1933 - def type(self, value):
1934 if isinstance(value, csb.core.string): 1935 value = csb.core.Enum.parse(SecStructures, value) 1936 if not value.enum is SecStructures: 1937 raise TypeError(value) 1938 self._type = value
1939 1940 @property
1941 - def length(self):
1942 """ 1943 Number of residues covered by this element 1944 @rtype: int 1945 """ 1946 return self.end - self.start + 1
1947 1948 @property
1949 - def score(self):
1950 """ 1951 Secondary structure confidence values for each residue 1952 @rtype: L{CollectionContainer} 1953 """ 1954 return self._score
1955 @score.setter
1956 - def score(self, scores):
1957 if not len(scores) == self.length: 1958 raise ValueError('There must be a score entry for each residue in the element.') 1959 self._score = csb.core.CollectionContainer( 1960 items=list(scores), type=int, start_index=self.start)
1961
1962 - def overlaps(self, other):
1963 """ 1964 Return True if C{self} overlaps with C{other}. 1965 1966 @type other: L{SecondaryStructureElement} 1967 @rtype: bool 1968 """ 1969 this = set(range(self.start, self.end + 1)) 1970 that = set(range(other.start, other.end + 1)) 1971 return not this.isdisjoint(that)
1972
1973 - def merge(self, other):
1974 """ 1975 Merge C{self} and C{other}. 1976 1977 @type other: L{SecondaryStructureElement} 1978 1979 @return: a new secondary structure element 1980 @rtype: L{SecondaryStructureElement} 1981 1982 @bug: confidence scores are lost 1983 """ 1984 if not self.overlaps(other): 1985 raise ValueError("Can't merge non-overlapping secondary structures") 1986 elif self.type != other.type: 1987 raise ValueError("Can't merge secondary structures of different type") 1988 1989 start = min(self.start, other.start) 1990 end = max(self.end, other.end) 1991 assert self.type == other.type 1992 1993 return SecondaryStructureElement(start, end, self.type)
1994
1995 - def to_string(self):
1996 """ 1997 Dump the element as a string. 1998 1999 @return: string representation of the element 2000 @rtype: str 2001 """ 2002 return str(self.type) * self.length
2003
2004 - def simplify(self):
2005 """ 2006 Convert to three-state secondary structure (Helix, Strand, Coil). 2007 """ 2008 if self.type in (SecStructures.Helix, SecStructures.Helix3, SecStructures.PiHelix): 2009 self.type = SecStructures.Helix 2010 elif self.type in (SecStructures.Strand, SecStructures.BetaBridge): 2011 self.type = SecStructures.Strand 2012 elif self.type in (SecStructures.Coil, SecStructures.Turn, SecStructures.Bend): 2013 self.type = SecStructures.Coil 2014 elif self.type == SecStructures.Gap or self.type is None: 2015 pass 2016 else: 2017 assert False, 'Unhandled SS type: ' + repr(self.type)
2018
2019 -class SecondaryStructure(csb.core.CollectionContainer):
2020 """ 2021 Describes the secondary structure of a chain. 2022 Provides an index-based access to the secondary structure elements of the chain. 2023 2024 @param string: a secondary structure string (e.g. a PSI-PRED output) 2025 @type string: str 2026 @param conf_string: secondary structure prediction confidence values, if available 2027 @type conf_string: str 2028 """
2029 - def __init__(self, string=None, conf_string=None):
2030 2031 super(SecondaryStructure, self).__init__(type=SecondaryStructureElement, start_index=1) 2032 2033 self._minstart = None 2034 self._maxend = None 2035 2036 if string is not None: 2037 for motif in SecondaryStructure.parse(string, conf_string): 2038 self.append(motif)
2039
2040 - def __str__(self):
2041 return self.to_string()
2042
2043 - def append(self, element):
2044 """ 2045 Add a new SecondaryStructureElement. Then sort all elements by 2046 their start position. 2047 """ 2048 super(SecondaryStructure, self).append(element) 2049 super(SecondaryStructure, self)._sort() 2050 2051 if self._minstart is None or element.start < self._minstart: 2052 self._minstart = element.start 2053 if self._maxend is None or element.end > self._maxend: 2054 self._maxend = element.end
2055 2056 @staticmethod
2057 - def parse(string, conf_string=None):
2058 """ 2059 Parse secondary structure from DSSP/PSI-PRED output string. 2060 2061 @param string: a secondary structure string (e.g. a PSI-PRED output) 2062 @type string: str 2063 @param conf_string: secondary structure prediction confidence values, if available 2064 @type conf_string: str 2065 2066 @return: a list of L{SecondaryStructureElement}s. 2067 @rtype: list 2068 2069 @raise ValueError: if the confidence string is not of the same length 2070 """ 2071 if not isinstance(string, csb.core.string): 2072 raise TypeError(string) 2073 2074 string = ''.join(re.split('\s+', string)) 2075 if conf_string is not None: 2076 conf_string = ''.join(re.split('\s+', conf_string)) 2077 if not len(string) == len(conf_string): 2078 raise ValueError('The confidence string has unexpected length.') 2079 motifs = [ ] 2080 2081 if not len(string) > 0: 2082 raise ValueError('Empty Secondary Structure string') 2083 2084 currel = string[0] 2085 start = 0 2086 2087 for i, char in enumerate(string + '.'): 2088 2089 if currel != char: 2090 try: 2091 type = csb.core.Enum.parse(SecStructures, currel) 2092 except csb.core.EnumValueError: 2093 raise UnknownSecStructureError(currel) 2094 confidence = None 2095 if conf_string is not None: 2096 confidence = list(conf_string[start : i]) 2097 confidence = list(map(int, confidence)) 2098 motif = SecondaryStructureElement(start + 1, i, type, confidence) 2099 motifs.append(motif) 2100 2101 currel = char 2102 start = i 2103 2104 return motifs
2105 2106 @property
2107 - def start(self):
2108 """ 2109 Start position of the leftmost element 2110 @rtype: int 2111 """ 2112 return self._minstart
2113 2114 @property
2115 - def end(self):
2116 """ 2117 End position of the rightmost element 2118 @rtype: int 2119 """ 2120 return self._maxend
2121
2122 - def clone(self):
2123 """ 2124 @return: a deep copy of the object 2125 """ 2126 return copy.deepcopy(self)
2127
2128 - def to_three_state(self):
2129 """ 2130 Convert to three-state secondary structure (Helix, Strand, Coil). 2131 """ 2132 for e in self: 2133 e.simplify()
2134
2135 - def to_string(self, chain_length=None):
2136 """ 2137 Get back the string representation of the secondary structure. 2138 2139 @return: a string of secondary structure elements 2140 @rtype: str 2141 2142 @bug: [CSB 0000003] If conflicting elements are found at a given rank, 2143 this position is represented as a coil. 2144 """ 2145 gap = str(SecStructures.Gap) 2146 coil = str(SecStructures.Coil) 2147 2148 if chain_length is None: 2149 chain_length = max(e.end for e in self) 2150 2151 ss = [] 2152 2153 for pos in range(1, chain_length + 1): 2154 elements = self.at(pos) 2155 if len(elements) > 0: 2156 if len(set(e.type for e in elements)) > 1: 2157 ss.append(coil) # [CSB 0000003] 2158 else: 2159 ss.append(elements[0].to_string()) 2160 else: 2161 ss.append(gap) 2162 2163 return ''.join(ss)
2164
2165 - def at(self, rank, type=None):
2166 """ 2167 @return: all secondary structure elements covering the specifid position 2168 @rtype: tuple of L{SecondaryStructureElement}s 2169 """ 2170 return self.scan(start=rank, end=rank, filter=type, loose=True, cut=True)
2171
2172 - def scan(self, start, end, filter=None, loose=True, cut=True):
2173 """ 2174 Get all secondary structure elements within the specified [start, end] region. 2175 2176 @param start: the start position of the region, 1-based, inclusive 2177 @type start: int 2178 @param end: the end position of the region, 1-based, inclusive 2179 @type end: int 2180 @param filter: return only elements of the specified L{SecStructures} kind 2181 @type filter: L{csb.core.EnumItem} 2182 @param loose: grab all fully or partially matching elements within the region. 2183 if False, return only the elements which strictly reside within 2184 the region 2185 @type loose: bool 2186 @param cut: if an element is partially overlapping with the start..end region, 2187 cut its start and/or end to make it fit into the region. If False, 2188 return the elements with their real lengths 2189 @type cut: bool 2190 2191 @return: a list of deep-copied L{SecondaryStructureElement}s, sorted by their 2192 start position 2193 @rtype: tuple of L{SecondaryStructureElement}s 2194 """ 2195 matches = [ ] 2196 2197 for m in self: 2198 if filter and m.type != filter: 2199 continue 2200 2201 if loose: 2202 if start <= m.start <= end or start <= m.end <= end or (m.start <= start and m.end >= end): 2203 partmatch = copy.deepcopy(m) 2204 if cut: 2205 if partmatch.start < start: 2206 partmatch.start = start 2207 if partmatch.end > end: 2208 partmatch.end = end 2209 if partmatch.score: 2210 partmatch.score = partmatch.score[start : end + 1] 2211 matches.append(partmatch) 2212 else: 2213 if m.start >= start and m.end <= end: 2214 matches.append(copy.deepcopy(m)) 2215 2216 matches.sort() 2217 return tuple(matches)
2218
2219 - def q3(self, reference, relaxed=True):
2220 """ 2221 Compute Q3 score. 2222 2223 @param reference: reference secondary structure 2224 @type reference: L{SecondaryStructure} 2225 @param relaxed: if True, treat gaps as coils 2226 @type relaxed: bool 2227 2228 @return: the percentage of C{reference} residues with identical 2229 3-state secondary structure. 2230 @rtype: float 2231 """ 2232 2233 this = self.clone() 2234 this.to_three_state() 2235 2236 ref = reference.clone() 2237 ref.to_three_state() 2238 2239 total = 0 2240 identical = 0 2241 2242 def at(ss, rank): 2243 elements = ss.at(rank) 2244 if len(elements) == 0: 2245 return None 2246 elif len(elements) > 1: 2247 raise ValueError('Flat secondary structure expected') 2248 else: 2249 return elements[0]
2250 2251 for rank in range(ref.start, ref.end + 1): 2252 q = at(this, rank) 2253 s = at(ref, rank) 2254 2255 if s: 2256 if relaxed or s.type != SecStructures.Gap: 2257 total += 1 2258 if q: 2259 if q.type == s.type: 2260 identical += 1 2261 elif relaxed: 2262 pair = set([q.type, s.type]) 2263 match = set([SecStructures.Gap, SecStructures.Coil]) 2264 if pair.issubset(match): 2265 identical += 1 2266 2267 if total == 0: 2268 return 0.0 2269 else: 2270 return identical * 100.0 / total
2271
2272 - def subregion(self, start, end):
2273 """ 2274 Same as C{ss.scan(...cut=True)}, but also shift the start-end positions 2275 of all motifs and return a L{SecondaryStructure} instance instead of a list. 2276 2277 @param start: start position of the subregion, with reference to the chain 2278 @type start: int 2279 @param end: start position of the subregion, with reference to the chain 2280 @type end: int 2281 2282 @return: a deep-copy sub-fragment of the original L{SecondaryStructure} 2283 @rtype: L{SecondaryStructure} 2284 """ 2285 sec_struct = SecondaryStructure() 2286 2287 for motif in self.scan(start, end, loose=True, cut=True): 2288 2289 motif.start = motif.start - start + 1 2290 motif.end = motif.end - start + 1 2291 if motif.score: 2292 motif.score = list(motif.score) # this will automatically fix the score indices in the setter 2293 sec_struct.append(motif) 2294 2295 return sec_struct
2296
2297 -class TorsionAnglesCollection(csb.core.CollectionContainer):
2298 """ 2299 Describes a collection of torsion angles. Provides 1-based list-like access. 2300 2301 @param items: an initialization list of L{TorsionAngles} 2302 @type items: list 2303 """
2304 - def __init__(self, items=None, start=1):
2308
2309 - def __repr__(self):
2310 if len(self) > 0: 2311 return "<TorsionAnglesList: {0} ... {1}>".format(self[self.start_index], self[self.last_index]) 2312 else: 2313 return "<TorsionAnglesList: empty>"
2314 2315 @property
2316 - def phi(self):
2317 """ 2318 List of all phi angles 2319 @rtype: list 2320 """ 2321 return [a.phi for a in self]
2322 2323 @property
2324 - def psi(self):
2325 """ 2326 List of all psi angles 2327 @rtype: list 2328 """ 2329 return [a.psi for a in self]
2330 2331 @property
2332 - def omega(self):
2333 """ 2334 List of all omega angles 2335 @rtype: list 2336 """ 2337 return [a.omega for a in self]
2338
2339 - def update(self, angles):
2340 self._update(angles)
2341
2342 - def rmsd(self, other):
2343 """ 2344 Calculate the Circular RSMD against another TorsionAnglesCollection. 2345 2346 @param other: subject (right-hand-term) 2347 @type other: L{TorsionAnglesCollection} 2348 2349 @return: RMSD based on torsion angles 2350 @rtype: float 2351 2352 @raise Broken3DStructureError: on discontinuous torsion angle collections 2353 (phi and psi values are still allowed to be absent at the termini) 2354 @raise ValueError: on mismatching torsion angles collection lengths 2355 """ 2356 if len(self) != len(other) or len(self) < 1: 2357 raise ValueError('Both collections must be of the same and positive length') 2358 2359 length = len(self) 2360 query, subject = [], [] 2361 2362 for n, (q, s) in enumerate(zip(self, other), start=1): 2363 2364 q = q.copy() 2365 q.to_radians() 2366 2367 s = s.copy() 2368 s.to_radians() 2369 2370 if q.phi is None or s.phi is None: 2371 if n == 1: 2372 q.phi = s.phi = 0.0 2373 else: 2374 raise Broken3DStructureError('Discontinuous torsion angles collection at {0}'.format(n)) 2375 2376 if q.psi is None or s.psi is None: 2377 if n == length: 2378 q.psi = s.psi = 0.0 2379 else: 2380 raise Broken3DStructureError('Discontinuous torsion angles collection at {0}'.format(n)) 2381 2382 query.append([q.phi, q.psi]) 2383 subject.append([s.phi, s.psi]) 2384 2385 return csb.bio.utils.torsion_rmsd(numpy.array(query), numpy.array(subject))
2386
2387 -class TorsionAngles(object):
2388 """ 2389 Describes a collection of phi, psi and omega backbone torsion angles. 2390 2391 It is assumed that the supplied values are either None, or fitting into 2392 the range of [-180, +180] for AngleUnites.Degrees and [0, 2pi] for Radians. 2393 2394 @param phi: phi angle value in C{units} 2395 @type phi: float 2396 @param psi: psi angle value in C{units} 2397 @type psi: float 2398 @param omega: omega angle value in C{units} 2399 @type omega: float 2400 @param units: any of L{AngleUnits}'s enum members 2401 @type units: L{csb.core.EnumItem} 2402 2403 @raise ValueError: on invalid/unknown units 2404 """ 2405
2406 - def __init__(self, phi, psi, omega, units=AngleUnits.Degrees):
2407 2408 try: 2409 if isinstance(units, csb.core.string): 2410 units = csb.core.Enum.parse(AngleUnits, units, ignore_case=True) 2411 else: 2412 if units.enum is not AngleUnits: 2413 raise TypeError(units) 2414 2415 except ValueError: 2416 raise ValueError('Unknown angle unit type {0}'.format(units)) 2417 2418 self._units = units 2419 2420 self._phi = None 2421 self._psi = None 2422 self._omega = None 2423 2424 self.phi = phi 2425 self.psi = psi 2426 self.omega = omega
2427
2428 - def __repr__(self):
2429 return "<TorsionAngles: phi={0.phi}, psi={0.psi}, omega={0.omega}>".format(self)
2430
2431 - def __nonzero__(self):
2432 return self.__bool__()
2433
2434 - def __bool__(self):
2435 return self.phi is not None \ 2436 or self.psi is not None \ 2437 or self.omega is not None
2438 2439 @property
2440 - def units(self):
2441 """ 2442 Current torsion angle units - a member of L{AngleUnits} 2443 @rtype: enum item 2444 """ 2445 return self._units
2446 2447 @property
2448 - def phi(self):
2449 return self._phi
2450 @phi.setter
2451 - def phi(self, phi):
2452 TorsionAngles.check_angle(phi, self._units) 2453 self._phi = phi
2454 2455 @property
2456 - def psi(self):
2457 return self._psi
2458 @psi.setter
2459 - def psi(self, psi):
2460 TorsionAngles.check_angle(psi, self._units) 2461 self._psi = psi
2462 2463 @property
2464 - def omega(self):
2465 return self._omega
2466 @omega.setter
2467 - def omega(self, omega):
2468 TorsionAngles.check_angle(omega, self._units) 2469 self._omega = omega
2470
2471 - def copy(self):
2472 """ 2473 @return: a deep copy of C{self} 2474 """ 2475 return TorsionAngles(self.phi, self.psi, self.omega, self.units)
2476
2477 - def to_degrees(self):
2478 """ 2479 Set angle measurement units to degrees. 2480 Convert the angles in this TorsionAngles instance to degrees. 2481 """ 2482 2483 if self._units != AngleUnits.Degrees: 2484 2485 phi = TorsionAngles.deg(self._phi) 2486 psi = TorsionAngles.deg(self._psi) 2487 omega = TorsionAngles.deg(self._omega) 2488 2489 # if no ValueError is raised by TorsionAngles.check_angle in TorsionAngles.deg: 2490 # (we assign directly to the instance variables to avoid check_angle being invoked again in setters) 2491 self._phi, self._psi, self._omega = phi, psi, omega 2492 self._units = AngleUnits.Degrees
2493 2494
2495 - def to_radians(self):
2496 """ 2497 Set angle measurement units to radians. 2498 Convert the angles in this TorsionAngles instance to radians. 2499 """ 2500 2501 if self._units != AngleUnits.Radians: 2502 2503 phi = TorsionAngles.rad(self._phi) 2504 psi = TorsionAngles.rad(self._psi) 2505 omega = TorsionAngles.rad(self._omega) 2506 2507 # if no ValueError is raised by TorsionAngles.check_angle in TorsionAngles.rad: 2508 # (we assign directly to the instance variables to avoid check_angle being invoked again in setters) 2509 self._phi, self._psi, self._omega = phi, psi, omega 2510 self._units = AngleUnits.Radians
2511 2512 @staticmethod
2513 - def check_angle(angle, units):
2514 """ 2515 Check the value of a torsion angle expressed in the specified units. 2516 """ 2517 if angle is None: 2518 return 2519 elif units == AngleUnits.Degrees: 2520 if not (-180 <= angle <= 180): 2521 raise ValueError('Torsion angle {0} is out of range -180..180'.format(angle)) 2522 elif units == AngleUnits.Radians: 2523 if not (0 <= angle <= (2 * math.pi)): 2524 raise ValueError('Torsion angle {0} is out of range 0..2pi'.format(angle)) 2525 else: 2526 raise ValueError('Unknown angle unit type {0}'.format(units))
2527 2528 @staticmethod
2529 - def rad(angle):
2530 """ 2531 Convert a torsion angle value, expressed in degrees, to radians. 2532 Negative angles are converted to their positive counterparts: rad(ang + 360deg). 2533 2534 Return the calculated value in the range of [0, 2pi] radians. 2535 """ 2536 TorsionAngles.check_angle(angle, AngleUnits.Degrees) 2537 2538 if angle is not None: 2539 if angle < 0: 2540 angle += 360. 2541 angle = math.radians(angle) 2542 return angle
2543 2544 @staticmethod
2545 - def deg(angle):
2546 """ 2547 Convert a torsion angle value, expressed in radians, to degrees. 2548 Negative angles are not accepted, it is assumed that negative torsion angles have been 2549 converted to their ang+2pi counterparts beforehand. 2550 2551 Return the calculated value in the range of [-180, +180] degrees. 2552 """ 2553 TorsionAngles.check_angle(angle, AngleUnits.Radians) 2554 2555 if angle is not None: 2556 if angle > math.pi: 2557 angle = -((2. * math.pi) - angle) 2558 angle = math.degrees(angle) 2559 2560 return angle
2561