Package csb :: Package apps :: Module bfit
[frames] | no frames]

Source Code for Module csb.apps.bfit

  1  """ 
  2  Python application for robust structure superposition of two structures. 
  3  bfit models non-rigid displacements in protein ensembles with outlier-tolerant 
  4  probability distributions. 
  5  """ 
  6  import numpy 
  7   
  8  import csb.apps 
  9  import csb.bio.utils 
 10   
 11  from csb.bio.io.wwpdb import LegacyStructureParser 
 12  from csb.bio.utils import probabilistic_fit 
 13  from csb.statistics.scalemixture import ScaleMixture, GammaPrior, InvGammaPrior 
 14  from csb.statistics.scalemixture import GammaPosteriorMAP, InvGammaPosteriorMAP 
 15  from csb.bio.sequence import SequenceAlignment 
16 17 18 -class ExitCodes(csb.apps.ExitCodes):
19 IO_ERROR = 2 20 INPUT_ERROR = 3
21
22 -class AppRunner(csb.apps.AppRunner):
23 24 @property
25 - def target(self):
26 return BFitApp
27
28 - def command_line(self):
29 30 cmd = csb.apps.ArgHandler(self.program, __doc__) 31 32 # Input structures 33 cmd.add_positional_argument('pdb1', str, 34 'full path to the first structure') 35 36 cmd.add_positional_argument('pdb2', str, 37 'full path to the second structure') 38 39 # Optional arguments 40 cmd.add_scalar_option('chain1', 'c', str, 41 'Chain of the first structure', 42 default='A') 43 44 cmd.add_scalar_option('chain2', 'd', str, 45 'Chain of the second structure', 46 default='A') 47 48 cmd.add_scalar_option('scalemixture', 's', str, 49 'Scale mixture distribution', 50 default='student', 51 choices=['student', 'k']) 52 53 54 cmd.add_scalar_option('alignment', 'a', str, 55 'Alignment in fasta format defining equivalent positions\n' 56 + 'Assumes that chain1 is the first sequence of ' 57 + 'the alignment and chain2 the second sequence') 58 59 cmd.add_scalar_option('outfile', 'o', str, 60 'file to which the rotated second ' + 61 'structure will be written', 62 default='bfit.pdb') 63 64 cmd.add_scalar_option('niter', 'n', int, 65 'Number of optimization steps', 66 default=200) 67 68 cmd.add_boolean_option('em', None, 69 'Use the EM algorithm for optimsation', 70 default = False) 71 72 return cmd
73
74 75 76 -class BFitApp(csb.apps.Application):
77 """ 78 Python application for robust structure superposition of two protein structures 79 """ 80
81 - def main(self):
82 try: 83 parser = LegacyStructureParser(self.args.pdb1) 84 r = parser.parse() 85 86 parser = LegacyStructureParser(self.args.pdb2) 87 m = parser.parse() 88 except IOError as e: 89 self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) 90 91 X = numpy.array(r[self.args.chain1].get_coordinates(['CA'], True)) 92 Y = numpy.array(m[self.args.chain2].get_coordinates(['CA'], True)) 93 94 if self.args.alignment is not None: 95 align = SequenceAlignment.parse(file(self.args.alignment).read()) 96 align = align[:2, :] 97 98 matches = [] 99 for i in range(1, align.length + 1): 100 if not align.gap_at(i): 101 matches.append([align.columns[i][0].rank - 1, 102 align.columns[i][1].rank - 1]) 103 matches = numpy.array(matches) 104 X = X[matches[:, 0], :] 105 Y = Y[matches[:, 1], :] 106 107 108 if len(X) != len(Y): 109 self.exit('Structures are of different lengths,' + 110 ' please specify an alignment', 111 ExitCodes.INPUT_ERROR) 112 113 if self.args.scalemixture == 'student': 114 prior = GammaPrior() 115 if self.args.em: 116 prior.estimator = GammaPosteriorMAP() 117 118 elif self.args.scalemixture == 'k': 119 prior = InvGammaPrior() 120 if self.args.em: 121 prior.estimator = InvGammaPosteriorMAP() 122 123 mixture = ScaleMixture(scales=X.shape[0], 124 prior=prior, d=3) 125 126 R, t = csb.bio.utils.fit(X, Y) 127 128 # gibbs sampling cycle 129 for i in range(self.args.niter): 130 # apply rotation 131 data = numpy.sum((X - numpy.dot(Y, numpy.transpose(R)) - t) ** 2, 132 axis= -1) ** (1. / 2) 133 # sample scales 134 mixture.estimate(data) 135 # sample rotations 136 R, t = probabilistic_fit(X, Y, mixture.scales) 137 138 139 m.transform(R, t) 140 m.to_pdb(self.args.outfile)
141 142 143 144 if __name__ == '__main__': 145 AppRunner().run() 146