Package csb :: Package bio :: Package io :: Module isites
[frames] | no frames]

Source Code for Module csb.bio.io.isites

  1  """ 
  2  I-Sites fragment library parser. 
  3   
  4  @deprecated: legacy module. 
  5  """ 
  6   
  7  import os 
  8  import re 
  9  import csb.io 
 10  import csb.bio.structure as structure 
 11  import csb.bio.fragments.isites as isites 
12 13 -class Tags(object):
14 """ 15 Enumeration of I-Sites flat file tags. 16 """ 17 18 LIBRARY = 'LIBRARY' 19 REMARK = 'REMARK' 20 CENTROID = 'CENTROID' 21 CLUSTER = 'CLUSTER' 22 FROM = 'FROM' 23 CREATEDBY = 'CREATEDBY' 24 MDACUT = 'MDACUT' 25 DMECUT = 'DMECUT' 26 PSEUDOCOUNT = 'PSEUDOCOUNT' 27 LINEARFIT = 'LINEARFIT' 28 COVARWEIGHT = 'COVARWEIGHT' 29 PARADIGM = 'PARADIGM' 30 ANGLES = 'ANGLES' 31 PROFILE = 'PROFILE' 32 COVARTENSOR = 'COVARTENSOR' 33 END = 'END'
34
35 -class ISitesParser(object):
36 """ 37 Implements an I-Sites fragment library parser v.5.1+ (2008). 38 39 @param flatfile: input *.isl I-Sites library file name 40 @type flatfile: str 41 @param express: if True, speeds up the parser by ignoring the covariance tensors 42 @type express: bool 43 44 @raise IOError: when the source file cannot be found 45 """ 46
47 - def __init__(self, flatfile, express=False):
48 if not os.path.exists(flatfile): 49 raise IOError("Could not read file {0}".format(flatfile)) 50 51 self._flatfile = flatfile 52 self.express = bool(express) 53 self._streams = [ ] 54 self._library = None
55
56 - def __enter__(self):
57 return self
58
59 - def __exit__(self, exc_type, exc_value, traceback):
60 for s in self._streams: 61 try: 62 s.close() 63 except: 64 pass
65
66 - def __del__(self):
67 for s in self._streams: 68 try: 69 s.close() 70 except: 71 pass
72
73 - def _newstream(self):
74 75 s = open(self._flatfile, mode='r') 76 self._streams.append(s) 77 return s
78 79 @property
80 - def library(self):
81 """ 82 Return the general properties of the library. 83 Library Clusters are iterable, but read efficiently only on demand. 84 """ 85 return self.parse()
86 87 @property
88 - def clusters(self):
89 """ 90 Efficient iterator over all L{Cluster} objects. 91 """ 92 reader = csb.io.EntryReader(self._newstream(), Tags.CLUSTER, Tags.END) 93 94 for entry in reader.entries(): 95 yield self.parse_entry(entry)
96
97 - def parseall(self):
98 """ 99 Parse the whole library to end. 100 101 @return: object representation of the library with all clusters pre-parsed 102 @rtype: L{Library} 103 """ 104 library = self.parse() 105 library.clusters = list(self.clusters) 106 return library
107
108 - def parse(self):
109 """ 110 Parse I-sites library common/general properties. Clusters are not parsed, 111 but can be fetched on demand while iterating over C{library.clusters}. 112 113 @return: object representation of the library with a 114 bound clusters generator 115 @rtype: L{Library} 116 """ 117 118 library = isites.Library() 119 library.centroids = [ ] 120 library.documentation = '' 121 library.clusters = self.clusters 122 123 stream = self._newstream() 124 done = False 125 126 while not done: 127 128 line = stream.readline() 129 if not line: 130 done = True 131 break 132 133 if line.startswith(Tags.REMARK) and line[len(Tags.REMARK):].strip() in ('===== start of library =====', '====== start of clusters ======='): 134 done = True 135 break 136 137 elif line.startswith(Tags.LIBRARY): 138 fields = line.split()[1:] 139 if len(fields) > 1: 140 library.name, library.version = fields[0], fields[1] 141 else: 142 library.version = fields[0] 143 144 elif line.startswith(Tags.CENTROID): 145 fields = line.split() 146 index = int(fields[1]) 147 values = fields[2].split(',') 148 149 matrow = { } 150 151 for cn, aa in enumerate('ACDEFGHIKLMNPQRSTVWY'): 152 matrow[aa] = float(values[cn]) 153 154 if index > len(library.centroids) - 1: # This is because in the current version CENTROIDS appears twice by mistake 155 assert index == len(library.centroids), "Centroid offset indices are consecutive numbers, starting from 0." 156 library.centroids.append(matrow) 157 158 elif line.startswith(Tags.REMARK): 159 library.documentation += line[len(Tags.REMARK)+1:] 160 161 stream.close() 162 163 return library
164
165 - def parse_entry(self, entry):
166 """ 167 Parse a single I-Sites entry. 168 169 @return: object representation of the entry 170 @rtype: L{Cluster} 171 """ 172 cluster = isites.Cluster() 173 lines = iter(entry.splitlines()) 174 175 done = False 176 in_profile = False 177 in_tensor = False 178 179 while not done: 180 try: 181 line = next(lines) 182 except StopIteration: 183 done = True 184 break 185 186 if line.startswith(Tags.CLUSTER): 187 fields = line.split()[1:] 188 cluster.id, cluster.motiflen, cluster.profilelen, cluster.overhang = map(int, fields) 189 190 elif line.startswith(Tags.FROM): 191 cluster.file = line.split()[1] 192 193 elif line.startswith(Tags.CREATEDBY): 194 cluster.program = line[len(Tags.CREATEDBY)+1:].strip() 195 196 elif line.startswith(Tags.MDACUT): 197 field = line.split()[1] 198 cluster.mda = float(field) 199 200 elif line.startswith(Tags.DMECUT): 201 field = line.split()[1] 202 cluster.dme = float(field) 203 204 elif line.startswith(Tags.PSEUDOCOUNT): 205 field = line.split()[1] 206 cluster.pseudocount = float(field) 207 assert cluster.pseudocount > 0 208 209 elif line.startswith(Tags.LINEARFIT): 210 fields = line.split()[1:] 211 cluster.linearfit = tuple(map(float, fields)) 212 213 elif line.startswith(Tags.COVARWEIGHT): 214 field = line.split()[1] 215 cluster.covarweight = float(field) 216 217 elif line.startswith(Tags.PARADIGM): 218 fields = line.split()[1:] 219 cluster.representative = isites.RepStructureFragment(fields[0], fields[1], int(fields[2])) 220 if fields[1] == '_': 221 cluster.representative.chain = '' 222 223 elif line.startswith(Tags.ANGLES): 224 rn = -1 225 while True: 226 try: 227 subline = next(lines) 228 except StopIteration: 229 break 230 if subline.startswith(Tags.PROFILE): 231 in_profile = True 232 break 233 elif subline.startswith(Tags.END): 234 break 235 236 rn += 1 237 fields = subline.split() 238 angles = tuple(map(float, fields[1:])) 239 240 torsion = structure.TorsionAngles(angles[0], angles[1], angles[2], units=structure.AngleUnits.Degrees) 241 j = cluster.representative.angles.append(torsion) 242 243 assert rn == j-1 == int(fields[0]), "Angle offsets in a cluster are consecutive numbers, starting at 0." 244 245 elif line.startswith(Tags.PROFILE): 246 in_profile = True 247 248 elif in_profile: 249 cluster.profile = isites.ProteinProfile(isites.ProteinProfile.BackgroundFreqs, alpha=cluster.pseudocount) 250 rn = -1 251 subline = line 252 253 while True: 254 if subline.startswith(Tags.CREATEDBY) or subline.startswith(Tags.END): 255 in_profile = False 256 break 257 elif subline.startswith(Tags.COVARTENSOR): 258 in_tensor = True 259 in_profile = False 260 break 261 262 rn += 1 263 fields = subline.split() 264 265 assert rn == int(fields[0]), "ProteinProfile rows in a cluster are consecutive numbers," \ 266 + " starting from 0 (cluster {0}, profile row {1}/{2}).".format(cluster.id, rn, fields[0]) 267 column = { } 268 for cn, aa in enumerate('ACDEFGHIKLMNPQRSTVWY'): 269 column[aa] = float(fields[cn+1]) 270 cluster.profile.add_column(**column) 271 272 try: 273 subline = next(lines) 274 except StopIteration: 275 in_profile = False 276 break 277 278 assert cluster.profilelen == cluster.profile.length 279 280 elif line.startswith(Tags.COVARTENSOR): 281 in_tensor = True 282 283 elif in_tensor: 284 if self.express: 285 break 286 287 motiflen = cluster.motiflen 288 # cluster.covariance = [ [ [] ]*motiflen for ii in range(0, motiflen) ] 289 cluster.covariance = [ ] 290 for mi in range(0, motiflen): 291 cluster.covariance.append([]) 292 for mj in range(0, motiflen): #@UnusedVariable 293 cluster.covariance[mi].append([]) 294 295 rn = -1 296 i = j = -1 297 subline = line 298 dimline = re.compile('^[0-9]+\s+[0-9]+\s*$') 299 300 while True: 301 if subline.startswith(Tags.END): 302 in_tensor = False 303 break 304 305 rn += 1 306 fields = subline.split() 307 308 if re.match(dimline, subline): 309 istr, jstr = subline.split() 310 i, j = int(istr) - 1, int(jstr) - 1 311 assert 0 <= i < motiflen and 0 <= j < motiflen, "Covariance is a [motiflen x motiflen] matrix." 312 else: 313 values = list(map(float, subline.split())) 314 cluster.covariance[i][j].append(values) 315 316 try: 317 subline = next(lines) 318 except StopIteration: 319 in_tensor = False 320 break 321 322 elif line.startswith(Tags.END): 323 done = True 324 break 325 326 return cluster
327