Package csb :: Package bio :: Package io :: Module clans
[frames] | no frames]

Source Code for Module csb.bio.io.clans

   1  """ 
   2  Classes for parsing/manipulating/writing CLANS (by Tancred Frickey) files 
   3   
   4  This module defines L{ClansParser} and L{ClansFileWriter} for parsing and writing CLANS format files, respectively. 
   5  Further, class L{Clans} and several helper classes are used to hold and handle the parsed data. 
   6   
   7  The most commenly used CLANS data can be accessed in an L{Clans} instance via 
   8   - .entries <L{ClansEntryCollection} containing L{ClansEntry} instances> 
   9    - .name 
  10    - .seq <the amino acid sequence> 
  11    - .hsps <connections of this L{ClansEntry} to others> 
  12    - .groups <L{ClansSeqgroup}s the entry belongs to> 
  13   - .seqgroups <L{ClansSeqgroupCollection} containing L{ClansSeqgroup} instances> 
  14   - .params <L{ClansParams}> 
  15   
  16  Parse a file into L{Clans} instance C{clans_instance} by 
  17      >>> clans_instance = ClansParser().parse_file('input.clans') 
  18   
  19  Create a new entry C{e} with name \"C{my entry}\", sequence C{AAAA} and coordinates C{(x=1, y=1, z=1)} 
  20      >>> e = ClansEntry(name='my entry', seq='AAAA', coords=(1, 1, 1)) 
  21   
  22  and add it to an existing L{Clans} instance C{clans_instance} 
  23      >>> clans_instance.add_entry(e) 
  24   
  25  Entries can be accessed using indices of C{Clans} instances 
  26      >>> clans_instance[0]  # access to first entry 
  27   
  28  and deleted by 
  29      >>> clans_instance.remove_entry(e) 
  30   
  31  Equivalent functions exist for ClansSeqgroups. 
  32   
  33  Author: Klaus Kopec 
  34  MPI fuer Entwicklungsbiologie, Tuebingen 
  35  """ 
  36   
  37  import os 
  38  import re 
  39  import operator 
  40  import csb.core 
  41   
  42  from abc import ABCMeta, abstractmethod 
  43  from numpy import array, float64, eye, random 
44 45 46 -class MissingBlockError(Exception):
47 """ 48 Raised if an expected tag is not found during parsing of a CLANS file. 49 """ 50 pass
51
52 53 -class UnknownTagError(ValueError):
54 """ 55 Raised if an unknown tag is encountered while parsing a CLANS file. 56 """ 57 pass
58
59 60 -class Color(object):
61 """ 62 RGB color handling class. 63 Color is stored as r, g, b, and a (i.e. alpha) attributes. 64 Default color is C{r}=C{g}=C{b}=0 (i.e. black) with a=255 65 66 @param r: the red value 67 @type r: int 68 69 @param g: the green value 70 @type g: int 71 72 @param b: the blue value 73 @type b: int 74 75 @param a: the alpha value 76 @type a: int 77 """ 78
79 - def __init__(self, r=0, g=0, b=0, a=255):
80 self._r = None 81 self.r = r 82 self._g = None 83 self.g = g 84 self._b = None 85 self.b = b 86 self._a = None 87 self.a = a
88
89 - def __repr__(self):
90 return 'Color {0}'.format(self.to_clans_color())
91 92 __str__ = __repr__ 93 94 @staticmethod
95 - def from_string(color_string, separator=';'):
96 """ 97 Factory for a Color instance created from a string formatted as r{separator}g{separator}b{separator}a, where the final \'{separator}a\' is optional. 98 99 @param color_string: the color string 100 @type color_string: str 101 102 @raises TypeError: if {color_string} is not a string 103 @raises ValueError: if any value in color is outside of range(256) 104 """ 105 if not isinstance(color_string, csb.core.string): 106 raise TypeError('{0} is no string'.format(color_string)) 107 108 if color_string.count(separator) == 2: 109 r, g, b = map(int, color_string.split(';')) 110 a = 255 111 elif color_string.count(separator) == 3: 112 r, g, b, a = map(int, color_string.split(';')) 113 114 else: 115 raise ValueError( 116 ('format needs to be \'r{0}g{0}b\' but color_string was ' + 117 '{1} [optionally with alpha value: \'r{0}g{0}b{0}a\']').format(separator, color_string)) 118 119 return Color(r, g, b, a)
120 121 @property
122 - def r(self):
123 """ 124 the red value of the RGB color. 125 126 raises ValueError if C{value} is outside of range(256) 127 128 @rtype: int 129 """ 130 return self._r
131 132 @r.setter
133 - def r(self, value):
134 """ 135 Set the red value of the RGB color. 136 """ 137 if value < 0 or value > 255: 138 raise ValueError( 139 'valid color values are in range(256), was \'{0}\''.format( 140 value)) 141 142 self._r = value
143 144 @property
145 - def g(self):
146 """ 147 the green value of the RGB color. 148 149 raises ValueError if C{value} is outside of range(256) 150 151 @rtype: int 152 """ 153 return self._g
154 155 @g.setter
156 - def g(self, value):
157 158 if value < 0 or value > 255: 159 raise ValueError('valid color values are in range(256).') 160 161 self._g = value
162 163 @property
164 - def b(self):
165 """ 166 the blue value of the RGB color. 167 168 raises ValueError if C{value} is outside of range(256) 169 170 @rtype: int 171 """ 172 return self._b
173 174 @b.setter
175 - def b(self, value):
176 177 if value < 0 or value > 255: 178 raise ValueError('valid color values are in range(256).') 179 180 self._b = value
181 182 @property
183 - def a(self):
184 """ 185 the alpha value of the RGB color. 186 187 raises ValueError if C{value} is outside of range(256) 188 189 @rtype: int 190 """ 191 return self._a
192 193 @a.setter
194 - def a(self, value):
195 196 if value < 0 or value > 255: 197 raise ValueError('valid color values are in range(256).') 198 199 self._a = value
200
201 - def to_clans_color(self):
202 """ 203 Formats the color for use in CLANS files. 204 205 @return: the color formatted for use in CLANS files; format: r;g;b;a 206 @rtype: str 207 """ 208 return '{0.r};{0.g};{0.b};{0.a}'.format(self)
209
210 211 -class ClansParser(object):
212 """ 213 CLANS file format aware parser. 214 """ 215
216 - def __init__(self):
217 self._clans_instance = None 218 self._data_block_dict = {}
219
220 - def __repr__(self):
221 return 'ClansParser instance'
222 223 __str__ = __repr__ 224 225 @property
226 - def clans_instance(self):
227 """ 228 the L{Clans} instance that resulted from parsing a CLANS file. 229 230 raises a ValueError if no CLANS file has been parsed yet 231 232 @rtype: L{Clans} instance 233 """ 234 if self._clans_instance is None: 235 raise ValueError('you need to parse a CLANS file first') 236 237 return self._clans_instance
238
239 - def parse_file(self, filename, permissive=True):
240 """ 241 Create a L{Clans} instance by parsing the CLANS format file C{filename} 242 243 @param filename: name of the CLANS file. 244 @type filename: str 245 246 @param permissive: if True, tolerate missing non-essential or unknown 247 blocks. 248 @type permissive: bool 249 250 @rtype: L{Clans} instance 251 @return: a L{Clans} instance containing the parsed data 252 253 @raise MissingBlockError: if C{permissive == True} and any essential 254 block is missing. if C{permissive == False} and any block is missing 255 @raise UnknownTagError: if C{permissive == False} and an unknown tag/ 256 data block is encountered 257 """ 258 self._clans_instance = Clans() 259 self._clans_instance._filename = filename 260 261 self._read_block_dict() # read and preprocess the CLANS file 262 263 try: # param and rotmtx are non-essential blocks 264 self._parse_param() 265 self._parse_rotmtx() 266 except MissingBlockError as error: 267 if not permissive: 268 raise MissingBlockError(error) 269 270 seq = {} 271 try: 272 seq = self._parse_seq() 273 except MissingBlockError as error: 274 if not permissive: 275 raise MissingBlockError(error) 276 277 seqgroups = self._parse_seqgroups() 278 279 pos = {} 280 try: 281 pos = self._parse_pos() 282 except MissingBlockError as error: 283 if not permissive: 284 raise MissingBlockError(error) 285 286 hsp_att_mode = "hsp" 287 hsp = {} 288 try: 289 if 'hsp' in self._data_block_dict: 290 hsp = self._parse_hsp_att('hsp') 291 292 elif 'att' in self._data_block_dict: 293 hsp_att_mode = "att" 294 hsp = self._parse_hsp_att('att') 295 296 elif 'mtx' in self._data_block_dict: 297 hsp = self._parse_mtx() 298 299 except MissingBlockError as error: 300 if not permissive: 301 raise MissingBlockError(error) 302 303 ## raise UnknownTagError for unknown blocks 304 known_block_tags = set(('param', 'rotmtx', 'seq', 'seqgroups', 'pos', 305 'hsp', 'mtx', 'att')) 306 unprocessed_block_tags = set(self._data_block_dict.keys()).difference( 307 known_block_tags) 308 309 if len(unprocessed_block_tags) > 0 and not permissive: 310 raise UnknownTagError( 311 ('tags unknown: {0}. File corrupt or further implementations ' 312 + 'needed!').format(', '.join(unprocessed_block_tags))) 313 314 ## if no entries exist, we cannot add pos, seqgroup and hsp data 315 if len(seq) > 0: 316 317 ## add Entries 318 if len(pos) > 0: 319 for i in pos: 320 self._clans_instance.add_entry(ClansEntry(seq[i][0], seq[i][1], 321 pos[i], parent=self._clans_instance)) 322 323 ## add groups 324 self._clans_instance._seqgroups = ClansSeqgroupCollection() 325 if len(seqgroups) > 0: 326 for group_raw_data in seqgroups: 327 328 group = ClansSeqgroup(name=group_raw_data['name'], 329 type=group_raw_data['type'], 330 size=group_raw_data['size'], 331 hide=group_raw_data['hide'] == '1', 332 color=group_raw_data['color']) 333 334 ## get members corresponding to the IDs in this group 335 ## NOTE: this silently corrects files where a seqgroup 336 ## contains the same entry multiple times 337 members = [self._clans_instance.entries[number] 338 for number in set(group_raw_data['numbers'])] 339 340 self._clans_instance.add_group(group, members) 341 342 ## add hsp values 343 if len(hsp) > 0: 344 [self._clans_instance.entries[a].add_hsp( 345 self._clans_instance.entries[b], value) 346 for ((a, b), value) in hsp.items()] 347 348 self._clans_instance._hsp_att_mode = hsp_att_mode 349 350 return self._clans_instance
351
352 - def _read_block_dict(self):
353 """ 354 Extracts all <tag>DATA</tag> blocks from file 355 self.clans_instance.filename. 356 357 @rtype: dict 358 @return: data in the form: dict[tag] = DATA. 359 """ 360 # read file and remove the first line, i.e. sequence=SEQUENCE_COUNT 361 data_blocks = open(os.path.expanduser( 362 self._clans_instance.filename)).read().split('\n', 1)[1] 363 364 ## flag re.DOTALL is necessary to make . match newlines 365 data = re.findall(r'(<(\w+)>(.+)</\2>)', data_blocks, 366 flags=re.DOTALL) 367 self._data_block_dict = dict([(tag, datum.strip().split('\n')) 368 for _tag_plus_data, tag, datum in data])
369
370 - def _parse_param(self):
371 """ 372 Parse a list of lines in the CLANS <param> format: 373 374 parameter1=data1\n 375 parameter2=data2\n 376 ... 377 """ 378 if 'param' not in self._data_block_dict: 379 raise MissingBlockError('file contains no <param> block.') 380 381 block = self._data_block_dict['param'] 382 383 tmp_params = dict([block[i].split('=') for i in range(len(block))]) 384 385 ## create colors entry from colorcutoffs and colorarr 386 colorcutoffs = [float(val) for val in 387 tmp_params.pop('colorcutoffs').strip(';').split(';')] 388 colors = tmp_params.pop('colorarr').strip(':') 389 colors = colors.replace('(', '').replace(')', '').split(':') 390 colorarr = [Color(*map(int, color_definition)) for color_definition in 391 [color.split(';') for color in colors]] 392 393 tmp_params['colors'] = tuple(zip(colorcutoffs, colorarr)) 394 395 ## convert 'true' and 'false' into Python bools 396 for k, v in tmp_params.items(): 397 if v == 'true': 398 tmp_params[k] = True 399 elif v == 'false': 400 tmp_params[k] = False 401 402 self._clans_instance._params = ClansParams(strict=False, **tmp_params)
403
404 - def _parse_rotmtx(self):
405 """ 406 Parse a list of lines in the CLANS <rotmtx> format. The data is stored 407 in the clans_instance as a 3x3 numpy.array. 408 409 @raise ValueError: if the rotmtx block does not contain exactly 3 lines 410 """ 411 if 'rotmtx' not in self._data_block_dict: 412 raise MissingBlockError('file contains no <rotmtx> block.') 413 414 block = self._data_block_dict['rotmtx'] 415 416 if len(block) != 3: 417 raise ValueError('CLANS <rotmtx> blocks comprise exactly 3 lines.') 418 self._clans_instance.rotmtx = array( 419 [[float64(val) for val in line.split(';')[:3]] for line in block])
420
421 - def _parse_seq(self):
422 """ 423 Parse a list of lines in the CLANS <seq> format, which are in FASTA 424 format. 425 426 @rtype: dict 427 @return: dict with running numbers as key and 2-tuples (id, sequence) 428 as values 429 """ 430 if 'seq' not in self._data_block_dict: 431 raise MissingBlockError( 432 'file contains no <seq> block. This is OK if the file does ' 433 + 'not contain any sequences.') 434 435 block = self._data_block_dict['seq'] 436 if len(block) % 2 == 1: 437 block += [''] 438 439 return dict([(i, (block[2 * i][1:], block[2 * i + 1].strip())) 440 for i in range(int(len(block) / 2))])
441
442 - def _parse_seqgroups(self):
443 """ 444 Parse a list of lines in the CLANS <seqgroup> format: 445 446 name=name of the group\n 447 type=0\n 448 size=12\n 449 hide=0\n 450 color=255;204;51\n 451 numbers=0;1;2;3;4;5;6;10;13\n 452 ... 453 454 @rtype: list 455 @return: list of dicts (one for each group) with the tags (name, type, 456 size, hide, ...) as keys and their typecasted data as values 457 (i.e. name will be a string, size will be an integer, etc) 458 """ 459 if 'seqgroups' not in self._data_block_dict: 460 return ClansSeqgroupCollection() 461 462 block = self._data_block_dict['seqgroups'] 463 464 groups = [] 465 for line in block: 466 p, v = line.split('=') 467 if p == 'name': 468 groups.append({'name': v}) 469 elif p == 'numbers': 470 groups[-1][p] = [int(val) for val in v.split(';')[:-1]] 471 else: 472 groups[-1][p] = v 473 return groups
474
475 - def _parse_pos(self):
476 """ 477 Parse a list of lines in the CLANS <pos> format \'INT FLOAT FLOAT 478 FLOAT\'. 479 480 @rtype: dict 481 @return: a dict using the integers as keys and a (3,1)-array created 482 from the three floats as values. 483 """ 484 if 'pos' not in self._data_block_dict: 485 raise MissingBlockError( 486 'file contains no <pos> block. This is OK if the file does ' 487 + 'not contain any sequences.') 488 489 block = self._data_block_dict['pos'] 490 491 return dict([(int(l.split()[0]), 492 array([float64(val) for val in l.split()[1:]])) 493 for l in block])
494
495 - def _parse_hsp_att(self, mode):
496 """ 497 Parse a list of lines in the CLANS <hsp> format \'INT INT: FLOAT\'. 498 499 NOTE: some CLANS <hsp> lines contain more than one float; we omit the 500 additional numbers 501 502 @param mode: either "hsp" or "att" depending on the type of tag to be 503 parsed 504 @type mode: str 505 506 @rtype: dict 507 @return: a dict using 2-tuples of the two integers as keys and the 508 float as values 509 """ 510 if mode not in ("hsp", "att"): 511 raise ValueError('mode must be either "hsp" or "att"') 512 513 if mode not in self._data_block_dict: 514 raise MissingBlockError( 515 ('file contains no <{0}> block. This is OK if the file does ' 516 + 'not contain any sequences or if none of the contained ' 517 + 'sequences have any connections.').format(mode)) 518 519 block = self._data_block_dict[mode] 520 521 if mode == "hsp": 522 return dict([(tuple([int(val) 523 for val in line.split(':')[0].split()]), 524 float(line.split(':')[1].split(' ')[0])) 525 for line in block]) 526 527 else: 528 return dict([(tuple([int(val) for val in line.split(' ')[:2]]), 529 float(line.split(' ')[2])) 530 for line in block])
531
532 - def _parse_mtx(self):
533 """ 534 Parse a list of lines in the CLANS <mtx> format. 535 536 @rtype: dict 537 @return: a dict using 2-tuples of the two integers as keys and the 538 float as values 539 """ 540 if 'mtx' not in self._data_block_dict: 541 raise MissingBlockError( 542 'file contains no <mtx> block. This is OK if the file does ' 543 + 'not contain any sequences or if none of the contained ' 544 + 'sequences have any connections.') 545 546 block = self._data_block_dict['mtx'] 547 548 return dict([((i, j), float(entry)) 549 for i, line in enumerate(block) 550 for j, entry in enumerate(line.split(';')[:-1]) 551 if float(entry) != 0])
552
553 -class ClansFileBuilder(object):
554 """ 555 Base abstract files for building a file in CLANS format. 556 Defines a common step-wise interface according to the Builder pattern. 557 558 @param output: output stream (this is where the product is constructed) 559 @type output: stream 560 """ 561 562 __metaclass__ = ABCMeta 563
564 - def __init__(self, output):
565 566 if not hasattr(output, 'write'): 567 raise TypeError(output) 568 569 self._out = output
570 571 @property
572 - def output(self):
573 """ 574 Destination stream 575 @rtype: stream 576 """ 577 return self._out
578
579 - def write(self, text):
580 """ 581 Write a chunk of text 582 """ 583 self._out.write(text)
584
585 - def writeline(self, text):
586 """ 587 Write a chunk of text and append a new line terminator 588 """ 589 self._out.write(text) 590 self._out.write('\n')
591 592 @abstractmethod
593 - def add_param_block(self, block_data):
594 pass
595 596 @abstractmethod
597 - def add_rotmtx_block(self, block_data):
598 pass
599 600 @abstractmethod
601 - def add_seq_block(self, block_data):
602 pass
603 604 @abstractmethod
605 - def add_seqgroups_block(self, block_data):
606 pass
607 608 @abstractmethod
609 - def add_pos_block(self, block_data):
610 pass
611 612 @abstractmethod
613 - def add_hsp_block(self, block_data):
614 pass
615
616 617 -class ClansFileWriter(ClansFileBuilder):
618 """ 619 Class for serializing a L{Clans} instance to a file in CLANS format. 620 621 @param output: the output stream 622 @type output: stream 623 """ 624 625
626 - def __init__(self, output):
627 super(ClansFileWriter, self).__init__(output)
628
629 - def serialize(self, clans_instance):
630 """ 631 Creates a CLANS file containing all data from {clans_instance} 632 633 @param clans_instance: the source of the data to be serialized 634 @type clans_instance: a L{Clans} instance 635 """ 636 637 self.add_sequences_line(clans_instance) 638 self.add_param_block(clans_instance) 639 self.add_rotmtx_block(clans_instance) 640 self.add_seq_block(clans_instance) 641 self.add_seqgroups_block(clans_instance) 642 self.add_pos_block(clans_instance) 643 self.add_hsp_block(clans_instance)
644
645 - def add_sequences_line(self, clans_instance):
646 """ 647 Appends the \'sequences=<#sequences>\' line to {output}. 648 649 @param clans_instance: the source of the data to be serialized 650 @type clans_instance: a L{Clans} instance 651 """ 652 self.writeline('sequences={0}'.format(len(clans_instance.entries)))
653
654 - def add_param_block(self, clans_instance):
655 """ 656 Appends a <param>data</param> CLANS file block to {output}. 657 658 @param clans_instance: the source of the data to be serialized 659 @type clans_instance: a L{Clans} instance 660 """ 661 param_block = clans_instance.params._to_clans_param_block() 662 self.write(param_block)
663
664 - def add_rotmtx_block(self, clans_instance):
665 """ 666 Appends a <rotmtx>data</rotmtx> CLANS file block to {output}. 667 668 @param clans_instance: the source of the data to be serialized 669 @type clans_instance: a L{Clans} instance 670 671 @raise ValueError: if self.clans_instance.rotmtx is no 3x3 numpy.array 672 """ 673 rotmtx = clans_instance.rotmtx 674 675 if rotmtx is None: 676 return 677 678 if rotmtx.shape != (3, 3): 679 raise ValueError('rotmtx must be a 3x3 array') 680 681 self.writeline('<rotmtx>') 682 683 self.write('\n'.join( 684 ['{0};{1};{2};'.format(*tuple(rotmtx[i])) for i in range(3)])) 685 self.write('\n') 686 687 self.writeline('</rotmtx>')
688
689 - def add_seq_block(self, clans_instance):
690 """ 691 Appends a <seq>data</seq> CLANS file block to {output}. 692 693 @param clans_instance: the source of the data to be serialized 694 @type clans_instance: a L{Clans} instance 695 """ 696 self.writeline('<seq>') 697 698 self.write(''.join([e.output_string_seq() 699 for e in clans_instance.entries])) 700 701 self.writeline('</seq>')
702
703 - def add_seqgroups_block(self, clans_instance):
704 """ 705 Appends a <seqgroupsparam>data</seqgroups> CLANS file block to {output}. 706 707 @param clans_instance: the source of the data to be serialized 708 @type clans_instance: a L{Clans} instance 709 """ 710 seqgroups = clans_instance.seqgroups 711 712 if seqgroups is not None and len(seqgroups) > 0: 713 714 self.writeline('<seqgroups>') 715 716 self.write('\n'.join([s.output_string() for s in seqgroups])) 717 self.write('\n') 718 719 self.writeline('</seqgroups>')
720
721 - def add_pos_block(self, clans_instance):
722 """ 723 Appends a <pos>data</pos> CLANS file block to {output}. 724 725 @param clans_instance: the source of the data to be serialized 726 @type clans_instance: a L{Clans} instance 727 """ 728 self.writeline('<pos>') 729 730 self.write('\n'.join([e.output_string_pos() 731 for e in clans_instance.entries])) 732 self.write('\n') 733 734 self.writeline('</pos>')
735
736 - def add_hsp_block(self, clans_instance):
737 """ 738 Appends a <hsp>data</hsp> CLANS file block to {output}. 739 If the CLANS instance has hsp_att_mode=="att" we add a <att>data<att> 740 block which has the same format. 741 742 @param clans_instance: the source of the data to be serialized 743 @type clans_instance: a L{Clans} instance 744 """ 745 746 self.writeline('<{0}>'.format(clans_instance._hsp_att_mode)) 747 748 ## sorting is not necessary, but makes a nicer looking clans file 749 idToEntryMapping = [(e.get_id(), e) 750 for e in clans_instance.entries] 751 idToEntryMapping.sort(key=operator.itemgetter(0)) 752 entryToIdMapping = dict([(entry, identifier) 753 for (identifier, entry) in idToEntryMapping]) 754 755 for i, (entry1_id, entry1) in enumerate(idToEntryMapping): 756 757 ## sort list of hsp targets by id 758 hspTargets = [(entryToIdMapping[entry2], pvalue) 759 for (entry2, pvalue) in entry1.hsp.items()] 760 hspTargets.sort(key=operator.itemgetter(0)) 761 762 for (entry2_id, pvalue) in hspTargets: 763 if entry1_id >= entry2_id: 764 continue 765 766 line_format = '{0} {1}:{2}\n' 767 if clans_instance._hsp_att_mode == "att": 768 line_format = '{0} {1} {2}\n' 769 770 self.write( 771 line_format.format(entry1_id, entry2_id, repr(pvalue))) 772 773 self.writeline('</{0}>'.format(clans_instance._hsp_att_mode))
774
775 776 -class ClansEntryGiComparator(object):
777 """ 778 Comparator for two L{ClansEntry}s. 779 Comparison is based on \'gi|\' numbers and residue ranges parsed from 780 L{ClansEntry}.name attributes if they can be parsed from it. Otherwise 781 the complete name is used. 782 783 @raise ValueError: if a residue range contains no terminal residue 784 """ 785
786 - def __init__(self):
787 self._mapping = {} # mapping cache for faster access
788
789 - def __call__(self, entry1, entry2):
790 if entry1.name in self._mapping: 791 entry1_parsed = self._mapping[entry1.name] 792 else: 793 entry1_parsed = self._parse_entry_name(entry1.name) 794 self._mapping[entry1.name] = entry1_parsed 795 796 if entry2.name in self._mapping: 797 entry2_parsed = self._mapping[entry2.name] 798 else: 799 entry2_parsed = self._parse_entry_name(entry2.name) 800 self._mapping[entry2.name] = entry2_parsed 801 802 if entry1_parsed == entry2_parsed: 803 return True 804 805 if len(entry1_parsed) == 3 and len(entry2_parsed) == 3: 806 A = dict(zip(('gi', 'start', 'end'), entry1_parsed)) 807 B = dict(zip(('gi', 'start', 'end'), entry2_parsed)) 808 809 if A['gi'] != B['gi']: # different gi numbers 810 return False 811 812 ## switch so that A is the one that starts earlier 813 814 if A['start'] > B['start']: 815 A, B = B, A 816 817 common_residues = A['end'] - B['start'] 818 if common_residues < 0: 819 return False # B starts after A ends 820 821 if B['end'] < A['end']: 822 return True # A starts before B and ends after it => B is in A 823 824 ## > 75% of length of the shorter one are shared => identical 825 if common_residues > 0.75 * min(A['end'] - A['start'], 826 B['end'] - B['start']): 827 return True 828 return False
829
830 - def _parse_entry_name(self, name):
831 start = name.find('gi|') 832 if start == -1: 833 return name 834 real_start = start + 3 835 name = name[real_start:] 836 837 gi_number = name.split('|', 1)[0] 838 839 next_gi_start = name[real_start:].find('gi|') 840 841 if next_gi_start != -1: 842 name = name[:next_gi_start] 843 844 initial_residue_number = name.find('(') 845 if initial_residue_number == -1: 846 return gi_number 847 848 start = name[initial_residue_number + 1:].split('-') 849 ## if start is no integer, assume '(' is not the start of a range 850 try: 851 start = int(start[0]) 852 except ValueError: 853 return gi_number 854 855 residues_end = name.find(':') 856 if residues_end == -1: 857 ## some entries are not (x-y:z), but only (x-y) 858 residues_end = name.find(')') 859 if residues_end == -1: 860 raise ValueError( 861 'no end residue found in name\n\t{0}'.format(name)) 862 863 potential_start_and_end = name[:residues_end].split('-') 864 865 if len(potential_start_and_end) != 2: 866 return gi_number 867 try: 868 first_res, last_res = [int(val) for val in potential_start_and_end] 869 except ValueError: 870 return gi_number 871 872 return (gi_number, int(first_res), int(last_res))
873
874 875 -class ClansParams(object):
876 """ 877 Class for handling L{Clans} parameters. 878 See L{ClansParams}._DEFAULTS for accepted parameter names. 879 880 @kwparam **kw: parameters as C{kw[parameter_name] = parameter_value} 881 882 @raise KeyError: if a supplied parameter name is not known 883 (i.e. it is not a key in _DEFAULTS) 884 """ 885 886 _DEFAULTS = {'attfactor': 10.0, 887 'attvalpow': 1, 888 'avgfoldchange': False, 889 'blastpath': 'blastall -p blastp', 890 'cluster2d': False, 891 'colors': ((0.0, (230, 230, 230)), 892 (0.1, (207, 207, 207)), 893 (0.2, (184, 184, 184)), 894 (0.3, (161, 161, 161)), 895 (0.4, (138, 138, 138)), 896 (0.5, (115, 115, 115)), 897 (0.6, (92, 92, 92)), 898 (0.7, (69, 69, 69)), 899 (0.8, (46, 46, 46)), 900 (0.9, (23, 23, 23))), 901 'complexatt': True, 902 'cooling': 1.0, 903 'currcool': 1.0, 904 'dampening': 0.2, 905 'dotsize': 2, 906 'formatdbpath': 'formatdb', 907 'groupsize': 4, 908 'maxmove': 0.1, 909 'minattract': 1.0, 910 'ovalsize': 10, 911 'pval': 1.0, 912 'repfactor': 5.0, 913 'repvalpow': 1, 914 'showinfo': True, 915 'usefoldchange': False, 916 'usescval': False, 917 'zoom': 1.0} 918
919 - def __init__(self, strict=True, **kw):
920 self.set_default_params() 921 922 for param_name, param_value in kw.items(): 923 if param_name not in self._DEFAULTS and strict: 924 raise KeyError('parameter {0} (value: {1}) unknown'.format( 925 param_name, param_value)) 926 self.__setattr__(param_name, param_value)
927 928 @property
929 - def complexatt(self):
930 """ 931 if True, complex attraction computations are used. 932 933 raises ValueError if set to non-boolean value 934 935 @rtype: bool 936 """ 937 return self._complexatt
938 939 @complexatt.setter
940 - def complexatt(self, value):
941 if not isinstance(value, bool): 942 raise ValueError(('complexatt cannot be {0} (accepted values: True' 943 + '/False)').format(value)) 944 self._complexatt = value
945 946 @property
947 - def attfactor(self):
948 """ 949 factor in the attractive force 950 951 raises ValueError if C{value} is not castable to float 952 953 @rtype: float 954 """ 955 return self._attfactor
956 957 @attfactor.setter
958 - def attfactor(self, value):
959 self._attfactor = float(value)
960 961 @property
962 - def attvalpow(self):
963 """ 964 exponent in the attractive force 965 966 raises ValueError if C{value} is not castable to float 967 968 @rtype: float 969 """ 970 return self._attvalpow
971 972 @attvalpow.setter
973 - def attvalpow(self, value):
974 self._attvalpow = float(value)
975 976 @property
977 - def repfactor(self):
978 """ 979 factor in the repulsive force 980 981 raises ValueError if C{value} is not castable to float 982 983 @rtype: float 984 """ 985 return self._repfactor
986 987 @repfactor.setter
988 - def repfactor(self, value):
989 self._repfactor = float(value)
990 991 @property
992 - def repvalpow(self):
993 """ 994 exponent in the repulsive force 995 996 raises ValueError if C{value} is not castable to float 997 998 @rtype: float 999 """ 1000 return self._repvalpow
1001 1002 @repvalpow.setter
1003 - def repvalpow(self, value):
1004 self._repvalpow = float(value)
1005 1006 @property
1007 - def cluster2d(self):
1008 """ 1009 if True, clustering is done in 2D. Else in 3D. 1010 1011 raises ValueError if set to non-boolean value 1012 1013 @rtype: bool 1014 """ 1015 return self._cluster2d
1016 1017 1018 @cluster2d.setter
1019 - def cluster2d(self, value):
1020 if not isinstance(value, bool): 1021 raise ValueError(('cluster2d cannot be {0} (accepted values: True' 1022 + '/False)').format(value)) 1023 1024 self._cluster2d = value
1025 1026 @property
1027 - def pval(self):
1028 """ 1029 p-value cutoff that determines which connections are considered for 1030 the attractive force 1031 1032 raises ValueError if C{value} is not castable to float 1033 1034 @rtype: float 1035 """ 1036 return self._pval
1037 1038 @pval.setter
1039 - def pval(self, value):
1040 self._pval = float(value)
1041 1042 @property
1043 - def maxmove(self):
1044 """ 1045 maximal sequence (i.e. dot in the clustermap) movement per round 1046 1047 raises ValueError if C{value} is not castable to float 1048 1049 @rtype: float 1050 """ 1051 return self._maxmove
1052 1053 @maxmove.setter
1054 - def maxmove(self, value):
1055 self._maxmove = float(value)
1056 1057 @property
1058 - def usescval(self):
1059 """ 1060 parameter with unclear function. Check in Clans. 1061 1062 raises ValueError if set to non-boolean value 1063 1064 @rtype: bool 1065 """ 1066 return self._usescval
1067 1068 @usescval.setter
1069 - def usescval(self, value):
1070 if not isinstance(value, bool): 1071 raise ValueError(('usescval cannot be {0} (accepted values: True' 1072 + '/False)').format(value)) 1073 1074 self._usescval = value
1075 1076 @property
1077 - def cooling(self):
1078 """ 1079 parameter with unclear function. Check in Clans. 1080 1081 raises ValueError if C{value} is not castable to float 1082 1083 @rtype: float 1084 """ 1085 return self._cooling
1086 1087 @cooling.setter
1088 - def cooling(self, value):
1089 self._cooling = float(value)
1090 1091 @property
1092 - def currcool(self):
1093 """ 1094 parameter with unclear function. Check in Clans. 1095 1096 raises ValueError if C{value} is not castable to float 1097 1098 @rtype: float 1099 """ 1100 return self._currcool
1101 1102 @currcool.setter
1103 - def currcool(self, value):
1104 self._currcool = float(value)
1105 1106 @property
1107 - def dampening(self):
1108 """ 1109 parameter with unclear function. Check in Clans. 1110 1111 raises ValueError if C{value} is not castable to float 1112 1113 @rtype: float 1114 """ 1115 return self._dampening
1116 1117 @dampening.setter
1118 - def dampening(self, value):
1119 self._dampening = float(value)
1120 1121 @property
1122 - def minattract(self):
1123 """ 1124 parameter with unclear function. Check in Clans. 1125 1126 raises ValueError if C{value} is not castable to float 1127 1128 @rtype: float 1129 """ 1130 return self._minattract
1131 1132 @minattract.setter
1133 - def minattract(self, value):
1134 self._minattract = float(value)
1135 1136 @property
1137 - def blastpath(self):
1138 """ 1139 path to the BLAST executable for protein-protein comparisons. BLAST+ is 1140 currently not supported by Clans. 1141 1142 raises ValueError if C{value} is not a string 1143 1144 @rtype: str 1145 """ 1146 return self._blastpath
1147 1148 @blastpath.setter
1149 - def blastpath(self, value):
1150 if not isinstance(value, csb.core.string): 1151 raise ValueError(('blastpath cannot be {0} (accepted values: ' 1152 + 'strings)').format(value)) 1153 1154 self._blastpath = value
1155 1156 @property
1157 - def formatdbpath(self):
1158 """ 1159 path to the formatdb executable of BLAST. 1160 1161 raises ValueError if C{value} is not a string 1162 1163 @rtype: str 1164 """ 1165 return self._formatdbpath
1166 1167 @formatdbpath.setter
1168 - def formatdbpath(self, value):
1169 if not isinstance(value, csb.core.string): 1170 raise ValueError(('formatdbpath cannot be {0} (accepted values: ' 1171 + 'strings)').format(value)) 1172 1173 self._formatdbpath = value
1174 1175 @property
1176 - def showinfo(self):
1177 """ 1178 if True, additional data (rotation matrix) is shown in the clustring 1179 window) 1180 1181 raises ValueError if set to non-boolean value 1182 1183 @rtype: bool 1184 """ 1185 return self._showinfo
1186 1187 @showinfo.setter
1188 - def showinfo(self, value):
1189 if not isinstance(value, bool): 1190 raise ValueError(('showinfo cannot be {0} (accepted values: True' 1191 + '/False)').format(value)) 1192 1193 self._showinfo = value
1194 1195 @property
1196 - def zoom(self):
1197 """ 1198 zoom value (1.0 == not zoomed) 1199 1200 raises ValueError if C{value} is not castable to float 1201 1202 @rtype: float 1203 """ 1204 return self._zoom
1205 1206 @zoom.setter
1207 - def zoom(self, value):
1208 self._zoom = float(value)
1209 1210 @property
1211 - def dotsize(self):
1212 """ 1213 size of the central dot representing each sequence in the clustermap 1214 1215 raises ValueError if C{value} is not castable to int 1216 1217 @rtype: int 1218 """ 1219 return self._dotsize
1220 1221 @dotsize.setter
1222 - def dotsize(self, value):
1223 self._dotsize = int(value)
1224 1225 @property
1226 - def ovalsize(self):
1227 """ 1228 size of the circle around selected sequences 1229 1230 raises ValueError if value not castable to int 1231 1232 @rtype: int 1233 """ 1234 return self._ovalsize
1235 1236 @ovalsize.setter
1237 - def ovalsize(self, value):
1238 self._ovalsize = int(value)
1239 1240 @property
1241 - def groupsize(self):
1242 """ 1243 default for the size of circles that mark newly created groups 1244 1245 raises ValueError if C{value} is not castable to int 1246 1247 @rtype: int 1248 """ 1249 return self._groupsize
1250 1251 @groupsize.setter
1252 - def groupsize(self, value):
1253 self._groupsize = int(value)
1254 1255 @property
1256 - def usefoldchange(self):
1257 """ 1258 parameter with unclear function. Check in Clans. 1259 1260 raises ValueError if set to non-boolean value 1261 1262 @rtype: bool 1263 """ 1264 return self._usefoldchange
1265 1266 @usefoldchange.setter
1267 - def usefoldchange(self, value):
1268 if not isinstance(value, bool): 1269 raise ValueError(('usefoldchange cannot be {0} (accepted values: ' 1270 + 'True/False)').format(value)) 1271 1272 self._usefoldchange = value
1273 1274 @property
1275 - def avgfoldchange(self):
1276 """ 1277 parameter with unclear function. Check in Clans. 1278 1279 raises ValueError if set to non-boolean value 1280 1281 @rtype: bool 1282 """ 1283 return self._avgfoldchange
1284 1285 @avgfoldchange.setter
1286 - def avgfoldchange(self, value):
1287 if not isinstance(value, bool): 1288 raise ValueError(('avgfoldchange cannot be {0} (accepted values: ' 1289 + 'True/False)').format(value)) 1290 1291 self._avgfoldchange = value
1292 1293 @property
1294 - def colors(self):
1295 """ 1296 colors that define the coloring for different p-values/attractions 1297 1298 raises ValueError if set to s.th. else than a 10-tuple of 2-tuples 1299 1300 @rtype: tuple 1301 """ 1302 return self._colors
1303 1304 @colors.setter
1305 - def colors(self, value):
1306 if not isinstance(value, tuple): 1307 raise ValueError('colors must be a tuple') 1308 if len(value) != 10: 1309 raise ValueError('colors must be a 10-tuple') 1310 lengths = [len(v) for v in value] 1311 if len(set(lengths)) != 1 or lengths[0] != 2: 1312 raise ValueError('each item of colors must be a 2-tuple') 1313 self._colors = value
1314
1315 - def set_default_params(self):
1316 """ 1317 Sets the parameters to CLANS default values. 1318 See L{ClansParams}._DEFAULTS. 1319 """ 1320 for k, v in self._DEFAULTS.items(): 1321 if k == 'colors': 1322 continue 1323 1324 self.__setattr__(k, v) 1325 1326 tmp_list = [] 1327 for i, (cutoff, color) in enumerate(ClansParams._DEFAULTS['colors']): 1328 tmp_list.append((cutoff, Color(*color))) 1329 self.colors = tuple(tmp_list)
1330
1331 - def _to_clans_param_block(self):
1332 """ 1333 Creates a param block for a CLANS file from the L{ClansParams} values. 1334 1335 @return: a CLANS file format <param>[data]</param> block 1336 @rtype: str 1337 """ 1338 1339 param_dict = {} 1340 1341 for param_name in sorted(ClansParams._DEFAULTS): 1342 if param_name == 'colors': 1343 1344 ## divide 'colors' into 'colorcutoffs' and 'colorarr' 1345 param_dict['colorcutoffs'] = ''.join( 1346 ['{0:.2f};'.format(cutoff) for cutoff, color in self.colors]) 1347 1348 param_dict['colorarr'] = ''.join( 1349 ['({0}):'.format(color.to_clans_color()) 1350 for cutoff, color in self.colors]) 1351 1352 continue 1353 1354 if param_name in ('avgfoldchange', 'cluster2d', 'complexatt', 1355 'showinfo', 'usefoldchange', 'usescval'): 1356 param_dict[param_name] = ['false', 'true'][ 1357 self.__getattribute__(param_name)] 1358 1359 continue 1360 1361 param_dict[param_name] = self.__getattribute__(param_name) 1362 1363 param_block_string = '<param>\n' 1364 param_block_string += '\n'.join( 1365 ['{0}={1}'.format(param_name, param_dict[param_name]) 1366 for param_name in sorted(param_dict)]) 1367 param_block_string += '\n</param>\n' 1368 1369 return param_block_string
1370
1371 1372 -class ClansEntryCollection(csb.core.ReadOnlyCollectionContainer):
1373 """ 1374 Read-only container for holding (and internally managing) L{ClansEntry} 1375 instances. 1376 """ 1377
1378 - def __init__(self):
1379 1380 super(ClansEntryCollection, self).__init__(type=ClansEntry)
1381
1382 - def _remove_item(self, item):
1383 """ 1384 Removes {item} from the collection. 1385 1386 @param item: the item to be removed 1387 @type item: a L{ClansEntry} instance 1388 1389 @raises TypeError: if {item} is not a L{ClansEntry} instance 1390 """ 1391 1392 if self._type: 1393 if not isinstance(item, self._type): 1394 raise TypeError("Item {0} is not of the required {1} type.".format( 1395 item, self._type.__name__)) 1396 self._items.remove(item)
1397
1398 - def _sort(self):
1399 """ 1400 Sort entries by their {name}. 1401 1402 Note: If the L{ClansEntryCollection} is part of a L{Clans} instance, 1403 use L{Clans.sort} instead of this to avoid corrupting L{Clans._idx}. 1404 """ 1405 self._items.sort(key=lambda entry: entry.name)
1406
1407 1408 -class ClansSeqgroupCollection(csb.core.ReadOnlyCollectionContainer):
1409 """ 1410 Read-only container for holding (and internally managing) L{ClansSeqgroup} 1411 instances. 1412 """ 1413
1414 - def __init__(self):
1417
1418 - def _remove_item(self, item):
1419 """ 1420 Removes {item} from the collection. 1421 1422 @param item: the item to be removed 1423 @type item: a L{ClansSeqgroup} instance 1424 1425 @raises TypeError: if {item} is not a L{ClansSeqgroup} instance 1426 """ 1427 1428 if self._type: 1429 if not isinstance(item, self._type): 1430 raise TypeError("Item {0} is not of the required {1} type.".format( 1431 item, self._type.__name__)) 1432 self._items.remove(item)
1433
1434 1435 -class Clans(object):
1436 """ 1437 Class for holding and manipulating data from one CLANS file. 1438 Initialization is always done as empty clustermap with default parameters. 1439 """ 1440
1441 - def __init__(self):
1442 self._filename = None 1443 1444 self._params = ClansParams() 1445 1446 self._rotmtx = None 1447 self.set_default_rotmtx() 1448 1449 self._hsp_att_mode = "hsp" 1450 1451 self._entries = ClansEntryCollection() 1452 self._seqgroups = ClansSeqgroupCollection() 1453 1454 self._idx = None 1455 '''Index dict for fast access to entry positions''' 1456 self._has_good_index = False
1457
1458 - def __repr__(self):
1459 return 'Clans object: {0} sequences; {1} seqgroups'.format( 1460 len(self), len(self.seqgroups))
1461 1462 __str__ = __repr__ 1463
1464 - def __len__(self):
1465 return len(self.entries)
1466
1467 - def __getitem__(self, index):
1468 return self.entries[index]
1469
1470 - def __setitem__(self, index, data):
1471 self.entries[index] = data 1472 self._has_good_index = False
1473 1474 @property
1475 - def filename(self):
1476 """ 1477 file from which the data was parsed 1478 1479 @rtype: str or None 1480 """ 1481 return self._filename
1482 1483 @property
1484 - def params(self):
1485 """ 1486 L{ClansParams} that contains the parameters set for this L{Clans} 1487 instance. 1488 1489 @rtype: L{ClansParams} 1490 """ 1491 return self._params
1492 1493 @property
1494 - def rotmtx(self):
1495 """ 1496 3x3 rotation matrix that indicates the rotation state of the clustermap 1497 1498 raises ValueError if rotation matrix shape is not 3x3 1499 1500 @rtype: numpy.array 1501 """ 1502 return self._rotmtx
1503 1504 @rotmtx.setter
1505 - def rotmtx(self, value):
1506 if value.shape != (3, 3): 1507 raise ValueError('rotation matrix needs to be a 3x3 numpy array') 1508 self._rotmtx = value
1509 1510 @property
1511 - def entries(self):
1512 """ 1513 list of clustermap L{ClansEntry}s. 1514 1515 @rtype: list 1516 """ 1517 return self._entries
1518 1519 @property
1520 - def seqgroups(self):
1521 """ 1522 list of L{ClansSeqgroup}s defined in the clustermap. 1523 1524 @rtype: list 1525 """ 1526 return self._seqgroups
1527
1528 - def set_default_rotmtx(self):
1529 """ 1530 Resets the rotation matrix (rotmtx) to no rotation. 1531 """ 1532 self.rotmtx = eye(3)
1533
1534 - def _update_index(self):
1535 """ 1536 Creates an index of L{ClansEntry}s to their position in the L{Clans} 1537 instance. 1538 1539 The index is used to allow for fast access via L{ClansEntry.get_id} and 1540 was introduced to get a better L{Clans}.write() performance, which 1541 suffered from excessive entry.get_id() calls during HSP block generation 1542 (see L{ClansFileWriter.add_hsp_block}). 1543 1544 @attention: the index needs unique entry names. This is ensured with a 1545 call to L{Clans.remove_duplicates} and can decrease the number of 1546 entries! 1547 """ 1548 self.remove_duplicates() 1549 1550 self._idx = dict([(e._get_unique_id(), i) 1551 for i, e in enumerate(self.entries)]) 1552 self._has_good_index = True
1553
1554 - def sort(self):
1555 """ 1556 Sorts the L{ClansEntry}s by their {name}. 1557 """ 1558 1559 self._entries._sort() 1560 1561 self._has_good_index = False
1562
1563 - def add_group(self, group, members=None):
1564 """ 1565 Adds a new group. 1566 1567 @param group: the new group 1568 @type group: L{ClansSeqgroup} instance 1569 1570 @param members: L{ClansEntry} instances to be in the new group 1571 @type members: list 1572 1573 @raise ValueError: if group is no ClansSeqgroup instance 1574 """ 1575 self.seqgroups._append_item(group) 1576 1577 if members is not None: 1578 [group.add(member) for member in members]
1579
1580 - def remove_group(self, group):
1581 """ 1582 Removes a group. 1583 1584 @param group: the new group 1585 @type group: L{ClansSeqgroup} instance 1586 """ 1587 self.seqgroups._remove_item(group) 1588 1589 [group.remove(member) for member in group.members]
1590
1591 - def add_entry(self, entry):
1592 """ 1593 Adds an new entry. 1594 1595 @param entry: the new entry 1596 @type entry: L{ClansEntry} instance 1597 1598 @raise ValueError: if C{entry} is no L{ClansEntry} instance 1599 """ 1600 if not isinstance(entry, ClansEntry): 1601 raise ValueError('entries need to be L{ClansEntry} instances') 1602 1603 self.entries._append_item(entry) 1604 entry._parent = self 1605 1606 self._has_good_index = False
1607
1608 - def remove_entry_by_name(self, entry_name):
1609 """ 1610 Removes an entry fetched by its name. 1611 1612 @param entry_name: name of the entry that shall be removed 1613 @type entry_name: string 1614 """ 1615 entry = self.get_entry(entry_name, True) 1616 1617 self.remove_entry(entry)
1618
1619 - def remove_entry(self, entry):
1620 """ 1621 Removes an entry. 1622 1623 @param entry: the entry that shall be removed 1624 @type entry: L{ClansEntry} instance 1625 """ 1626 for other_entry in entry.hsp.keys(): 1627 other_entry.remove_hsp(entry) 1628 1629 for g in entry.groups: 1630 g.remove(entry) 1631 1632 remove_groups = [g for g in self.seqgroups if g.is_empty()] 1633 [self.seqgroups._remove_item(g) for g in remove_groups] 1634 1635 self.entries._remove_item(entry) 1636 self._has_good_index = False
1637
1638 - def get_entry(self, name, pedantic=True):
1639 """ 1640 Checks if an entry with name C{name} exists and returns it. 1641 1642 @param name: name of the sought entry 1643 @type name: str 1644 1645 @param pedantic: If True, a ValueError is raised if multiple entries 1646 with name name are found. If False, returns the first 1647 one. 1648 @type pedantic: bool 1649 1650 @raise ValueError: if no entry with name C{name} is found 1651 @raise ValueError: if multiple entries with name C{name} are found and 1652 C{pedantic == True} 1653 1654 @rtype: L{ClansEntry} 1655 @return: entry with name C{name} 1656 """ 1657 1658 hits = [e for e in self.entries if e.name == name] 1659 1660 if len(hits) == 1: 1661 return hits[0] 1662 1663 elif len(hits) > 1: 1664 if pedantic: 1665 raise ValueError( 1666 'multiple entries have name \'{0}\''.format(name)) 1667 return hits[0] 1668 1669 else: 1670 raise ValueError('ClansEntry {0} does not exist.'.format(name))
1671
1672 - def remove_duplicates(self, identity_function=None):
1673 """ 1674 Determines and removes duplicates using C{identity_function}. 1675 1676 @param identity_function: callable to compare two L{ClansEntry}s as 1677 parameters. Defaults to L{ClansEntryGiComparator}. 1678 @type identity_function: callable 1679 1680 @return: the removed entries 1681 @rtype: list of L{ClansEntry}s 1682 """ 1683 if identity_function is None: 1684 identity_function = ClansEntryGiComparator() 1685 1686 remove_us = list(set([e2 for i, e in enumerate(self.entries) 1687 for e2 in self.entries[i + 1:] 1688 if identity_function(e, e2)])) 1689 1690 [self.remove_entry(e) for e in remove_us] 1691 1692 return remove_us
1693
1694 - def restrict_to_max_pvalue(self, cutoff):
1695 """ 1696 removes all L{ClansEntry}s that have no connections above the C{cutoff} 1697 1698 @param cutoff: the cutoff 1699 @type cutoff: float 1700 """ 1701 ## loop to hit entries that have no HSPs left after the previous round 1702 removed_entries = [] # all removed entries go here 1703 remove_us = ['first_loop_round_starter'] 1704 while len(remove_us) > 0: 1705 1706 remove_us = [] # entries removed this round 1707 for entry in self.entries: 1708 hsp_values = entry.hsp.values() 1709 if len(hsp_values) == 0 or min(hsp_values) >= cutoff: 1710 remove_us.append(entry) 1711 removed_entries.append(entry) 1712 1713 [self.remove_entry(e) for e in remove_us if e in self] 1714 1715 return removed_entries
1716
1717 - def restrict(self, keep_names):
1718 """ 1719 Removes all entries whose name is not in keep_names 1720 1721 @param keep_names: names of entries that shall be kept 1722 @type keep_names: iterable 1723 """ 1724 1725 [self.remove_entry(entry) for entry in 1726 [e for e in self.entries if e.name not in keep_names]]
1727
1728 - def write(self, filename):
1729 """ 1730 writes the L{Clans} instance to a file in CLANS format 1731 1732 @param filename: the target file\'s name 1733 @type filename: str 1734 """ 1735 1736 with open(filename, 'w') as stream: 1737 writer = ClansFileWriter(stream) 1738 writer.serialize(self)
1739
1740 1741 -class ClansEntry(object):
1742 """ 1743 Class holding the data of one CLANS sequence entry. 1744 1745 @param name: the entry name 1746 @type name: str 1747 1748 @param seq: the entry\'s amino acid sequence 1749 @type seq: str 1750 1751 @param coords: coordinates in 3D space 1752 @type coords: iterable with 3 items 1753 1754 @param parent: parent of this entry 1755 @type parent: L{Clans} instance 1756 1757 """ 1758
1759 - def __init__(self, name=None, seq='', coords=None, parent=None):
1760 self._name = name 1761 self._seq = seq 1762 1763 if coords is None: 1764 coords = random.random(3) * 2 - 1 # each CLANS coord is -1.<x<1. 1765 self._coords = coords 1766 1767 self._parent = parent 1768 1769 self._groups = [] 1770 self._hsp = {}
1771
1772 - def __repr__(self):
1773 if self.coords is None: 1774 coords_string = 'NoCoordsSet' 1775 else: 1776 coords_string = '({0:.2f}, {1:.2f}, {2:.2f})'.format( 1777 *tuple(self.coords)) 1778 1779 groups = 'not in a group' 1780 if len(self.groups) > 0: 1781 groups = 'groups: {0}'.format( 1782 ', '.join([g.name for g in self.groups])) 1783 1784 return 'ClansEntry "{0}": {1} '.format( 1785 self.name, '; '.join((coords_string, groups)))
1786 1787 @property
1788 - def name(self):
1789 """ 1790 name of the entry 1791 1792 raises ValueError if C{value} is not a string 1793 1794 @rtype: string 1795 """ 1796 return self._name
1797 1798 @name.setter
1799 - def name(self, value):
1800 if not isinstance(value, csb.core.string): 1801 raise ValueError(('name cannot be {0} (accepted values: ' 1802 + 'strings)').format(value)) 1803 1804 self._name = value
1805 1806 @property
1807 - def seq(self):
1808 """ 1809 protein sequence of the entry 1810 1811 raises ValueError if C{value} is not a string 1812 1813 @rtype: string 1814 """ 1815 return self._seq
1816 1817 @seq.setter
1818 - def seq(self, value):
1819 if not isinstance(value, csb.core.string): 1820 raise ValueError(('seq cannot be {0} (accepted values: ' 1821 + 'strings)').format(value)) 1822 1823 self._seq = value
1824 1825 @property
1826 - def coords(self):
1827 """ 1828 entry coordinates in 3D space 1829 1830 raises ValueError if C{value} is not an iterable with 3 items 1831 1832 @rtype: string 1833 """ 1834 return self._coords
1835 1836 @coords.setter
1837 - def coords(self, value):
1838 if len(value) != 3: 1839 raise ValueError(('coords cannot be {0} (accepted values: ' 1840 + 'iteratables with 3 items)').format(value)) 1841 1842 self._coords = value
1843 1844 @property
1845 - def parent(self):
1846 """ 1847 L{Clans} instance that parents this L{ClansEntry} 1848 1849 @rtype: L{Clans} 1850 """ 1851 return self._parent
1852 1853 @property
1854 - def groups(self):
1855 """ 1856 L{ClansSeqgroup}s that contain the entry 1857 1858 @rtype: list 1859 """ 1860 return self._groups
1861 1862 @property
1863 - def hsp(self):
1864 """ 1865 connections between this and another L{ClansEntry} 1866 1867 @rtype: dict 1868 """ 1869 return self._hsp
1870
1871 - def get_id(self):
1872 """ 1873 Returns the id of the current entry. 1874 1875 Note: the first call to this method triggers L{Clans._update_index}, 1876 which will make it appear slower than successive calls. 1877 1878 @rtype: str 1879 @return: the entrys\' id is returned unless it has no parent in which 1880 case -1 is returned 1881 """ 1882 1883 if self.parent is None: 1884 return -1 1885 1886 if not self.parent._has_good_index: 1887 self.parent._update_index() 1888 1889 return self.parent._idx[self._get_unique_id()]
1890
1891 - def _get_unique_id(self):
1892 """ 1893 Returns a >>more<< unique ID (however this is not guaranteed to be 1894 really unique) than get_id. This ID determines which entries are deemed 1895 duplets by L{Clans}.remove_duplicates. 1896 1897 @rtype: str 1898 @return: a more or less unique id 1899 """ 1900 return self.name + '<###>' + self.seq
1901
1902 - def add_hsp(self, other, value):
1903 """ 1904 Creates an HSP from self to other with the given value. 1905 1906 @param other: the other entry 1907 @type other: L{ClansEntry} instance 1908 1909 @param value: the value of the HSP 1910 @type value: float 1911 """ 1912 self.hsp[other] = value 1913 other.hsp[self] = value
1914
1915 - def remove_hsp(self, other):
1916 """ 1917 Removes the HSP between C{self} and C{other}; if none exists, does 1918 nothing. 1919 1920 @param other: the other entry 1921 @type other: L{ClansEntry} instance 1922 """ 1923 if other in self.hsp: 1924 self.hsp.pop(other) 1925 1926 if self in other.hsp: 1927 other.hsp.pop(self)
1928
1929 - def output_string_seq(self):
1930 """ 1931 Creates the CLANS <seq> block format representation of the entry. 1932 1933 @rtype: str 1934 @return: entrys\' representation in CLANS <seq> block format 1935 """ 1936 1937 return '>{0}\n{1}\n'.format(self.name, self.seq)
1938
1939 - def output_string_pos(self):
1940 """ 1941 Create the CLANS <pos> block format representation of the entry. 1942 1943 @rtype: str 1944 @return: entrys\' representation in CLANS <pos> block format 1945 """ 1946 return '{0} {1:.8f} {2:.8f} {3:.8f}'.format( 1947 *tuple([self.get_id()] + list(self.coords)))
1948
1949 - def output_string_hsp(self):
1950 """ 1951 Creates the CLANS <hsp> block format representation of the entry. 1952 1953 1954 @rtype: str 1955 @return: entrys\' representation in CLANS <hsp> block format 1956 """ 1957 return '\n'.join(['{0} {1}:{2:.8f}'.format(self.get_id(), 1958 other.get_id(), value) 1959 for (other, value) in self.hsp.items()])
1960
1961 1962 -class ClansSeqgroup(object):
1963 """ 1964 Class holding the data of one CLANS group (seqgroup). 1965 1966 @kwparam name: name of the seqgroup 1967 @type name: string 1968 1969 @kwparam type: symbol used to represent the seqgroup in the graphical 1970 output 1971 @type type: int 1972 1973 @kwparam size: size of the symbol used to represent the seqgroup in the 1974 graphical output 1975 @type name: int 1976 1977 @kwparam hide: if True, the seqgroup\'s symbols in the graphical output are 1978 not drawn; default: False 1979 @type name: bool 1980 1981 @kwparam color: color of the seqgroup 1982 @type color: L{Color} or string formatted like \'x;y;z\' 1983 1984 @kwparam members: list of members of this seqgroup 1985 @type members: list 1986 """ 1987
1988 - def __init__(self, **kw):
1989 self._name = None 1990 self.name = kw.pop('name', 'NO NAME') 1991 1992 self._type = None 1993 self.type = kw.pop('type', 0) 1994 1995 self._size = None 1996 self.size = kw.pop('size', 4) 1997 1998 self._hide = None 1999 self.hide = kw.pop('hide', False) 2000 2001 self._color = None 2002 self.color = kw.pop('color', (255, 255, 255)) 2003 2004 self._members = [] 2005 if 'members' in kw: 2006 for member in kw['members']: 2007 self.add(member)
2008
2009 - def __repr__(self):
2010 return ('ClansSeqgroup {0.name}: type: {0.type}; size: {0.size}; hide:' 2011 + ' {0.hide}; color: {1}; #members: {2}').format( 2012 self, self.color.to_clans_color(), len(self.members))
2013
2014 - def __len__(self):
2015 return len(self.members)
2016 2017 @property
2018 - def name(self):
2019 """ 2020 name of the seqgroup 2021 2022 raises ValueError if C{value} is no string 2023 2024 @rtype: string 2025 """ 2026 return self._name
2027 2028 @name.setter
2029 - def name(self, value):
2030 if not isinstance(value, csb.core.string): 2031 raise ValueError('name must be a string') 2032 self._name = value
2033 2034 @property
2035 - def type(self):
2036 """ 2037 symbol used to represent the seqgroup in the graphical output 2038 2039 raises ValueError if C{value} is not castable to int 2040 2041 @rtype: int 2042 """ 2043 return self._type
2044 2045 @type.setter
2046 - def type(self, value):
2047 self._type = int(value)
2048 2049 @property
2050 - def size(self):
2051 """ 2052 size of the symbol used to represent the seqgroup in the graphical 2053 output 2054 2055 raises ValueError if C{value} is not castable to int 2056 2057 @rtype: int 2058 """ 2059 return self._size
2060 2061 @size.setter
2062 - def size(self, value):
2063 self._size = int(value)
2064 2065 @property
2066 - def hide(self):
2067 """ 2068 if True, the seqgroup\'s symbols in the graphical output are not drawn 2069 2070 raises ValueError if C{value} is no bool 2071 2072 @rtype: int 2073 """ 2074 return self._hide
2075 2076 @hide.setter
2077 - def hide(self, value):
2078 if not isinstance(value, bool): 2079 raise ValueError(('hide cannot be {0} (accepted values: ' 2080 + 'True/False)').format(value)) 2081 2082 self._hide = value
2083 2084 @property
2085 - def color(self):
2086 """ 2087 color of the seqgroup 2088 2089 raises ValueError if set to a wrongly formatted string (correct: 2090 \'{r};{g};{b}\') 2091 2092 @rtype: L{Color} 2093 """ 2094 return self._color
2095 2096 @color.setter
2097 - def color(self, value, separator=';'):
2098 # set values to those of existing Color instance 2099 if isinstance(value, Color): 2100 self._color = value 2101 return 2102 2103 ## parse color from string in format 'r;g;b' 2104 if isinstance(value, csb.core.string): 2105 self._color = Color.from_string(value) 2106 return 2107 2108 # parse 3-item iterables like (3, 5, 6) 2109 if len(value) == 3: 2110 self._color = Color(*tuple(map(int, value))) 2111 return 2112 2113 raise ValueError('cannot parse color from \'{0}\''.format(value))
2114 2115 2116 @property
2117 - def members(self):
2118 """ 2119 the members of this seqgroup 2120 2121 @rtype: list 2122 """ 2123 return self._members
2124
2125 - def is_empty(self):
2126 """ 2127 Checks if the group contains entries. 2128 2129 @rtype: bool 2130 @return: True if the group contains no entries, else False. 2131 """ 2132 return len(self) == 0
2133
2134 - def add(self, new_member):
2135 """ 2136 Adds entry C{new_member} to this L{ClansSeqgroup}. 2137 2138 @param new_member: the member that shall be added to this 2139 L{ClansSeqgroup} 2140 @type new_member: L{ClansEntry} instance 2141 2142 @raise TypeError: if C{new_member} is no L{ClansEntry} instance 2143 @raise ValueError: if C{new_member} is already contained in this 2144 L{ClansSeqgroup} 2145 """ 2146 if not isinstance(new_member, ClansEntry): 2147 raise TypeError('only ClansEntry instances can be added as ' + 2148 'group members') 2149 2150 if self.members.count(new_member) > 0: 2151 raise ValueError(('entry {0.name} is already contained in this ' 2152 + 'seqgroup').format(new_member)) 2153 2154 self.members.append(new_member) 2155 new_member.groups.append(self)
2156
2157 - def remove(self, member):
2158 """ 2159 Removes L{ClansEntry} C{member} from this group. 2160 2161 @param member: the member to be removed 2162 @type member: a L{ClansEntry} instance 2163 2164 @raise TypeError: if C{member} is no L{ClansEntry} instance 2165 @raise ValueError: if C{member} is not part of this L{ClansSeqgroup} 2166 """ 2167 if not isinstance(member, ClansEntry): 2168 raise TypeError('argument must be a ClansEntry instance') 2169 2170 if self.members.count(member) == 0: 2171 raise ValueError(('"{0.name}" is not a member of this ' 2172 + 'seqgroup').format(member)) 2173 2174 self.members.remove(member) 2175 member.groups.remove(self)
2176
2177 - def output_string(self):
2178 """ 2179 Creates the CLANS <seqgroup> block format representation of the 2180 group. 2181 2182 @rtype: str 2183 @return: entrys\' representation in CLANS <seqgroup> block format 2184 """ 2185 sorted_members = sorted([m.get_id() for m in self.members]) 2186 return ('name={0.name}\ntype={0.type}\nsize={0.size}\nhide={1}' 2187 + '\ncolor={2}\nnumbers={3}').format( 2188 self, int(self.hide), self.color.to_clans_color(), 2189 ';'.join([str(val) for val in sorted_members]) + ';')
2190