Package csb :: Package bio :: Package io :: Module clans
[frames] | no frames]

Source Code for Module csb.bio.io.clans

   1  """ 
   2  Classes for parsing/manipulating/writing CLANS (by Tancred Frickey) files 
   3   
   4  This module defines L{ClansParser} and L{ClansFileWriter} for parsing and writing CLANS format files, respectively. 
   5  Further, class L{Clans} and several helper classes are used to hold and handle the parsed data. 
   6   
   7  The most commenly used CLANS data can be accessed in an L{Clans} instance via 
   8   - .entries <L{ClansEntryCollection} containing L{ClansEntry} instances> 
   9    - .name 
  10    - .seq <the amino acid sequence> 
  11    - .hsps <connections of this L{ClansEntry} to others> 
  12    - .groups <L{ClansSeqgroup}s the entry belongs to> 
  13   - .seqgroups <L{ClansSeqgroupCollection} containing L{ClansSeqgroup} instances> 
  14   - .params <L{ClansParams}> 
  15   
  16  Parse a file into L{Clans} instance C{clans_instance} by 
  17      >>> clans_instance = ClansParser().parse_file('input.clans') 
  18   
  19  Create a new entry C{e} with name \"C{my entry}\", sequence C{AAAA} and coordinates C{(x=1, y=1, z=1)} 
  20      >>> e = ClansEntry(name='my entry', seq='AAAA', coords=(1, 1, 1)) 
  21   
  22  and add it to an existing L{Clans} instance C{clans_instance} 
  23      >>> clans_instance.add_entry(e) 
  24   
  25  Entries can be accessed using indices of C{Clans} instances 
  26      >>> clans_instance[0]  # access to first entry 
  27   
  28  and deleted by 
  29      >>> clans_instance.remove_entry(e) 
  30   
  31  Equivalent functions exist for ClansSeqgroups. 
  32   
  33  Author: Klaus Kopec 
  34  MPI fuer Entwicklungsbiologie, Tuebingen 
  35  """ 
  36   
  37  import os 
  38  import re 
  39  import operator 
  40  import csb.core 
  41   
  42  from abc import ABCMeta, abstractmethod 
  43  from numpy import array, float64, eye, random 
44 45 46 -class DuplicateEntryNameError(Exception):
47 """ 48 Raised during L{Clans.get_entry} if two entries have identical names. 49 """ 50 pass
51
52 53 -class DuplicateEntryError(Exception):
54 """ 55 Raised during L{Clans._update_index} if two entries are identical in name, sequence, and coordinates. 56 """ 57 pass
58
59 60 -class MissingBlockError(Exception):
61 """ 62 Raised if an expected tag is not found during parsing of a CLANS file. 63 """ 64 pass
65
66 67 -class UnknownTagError(ValueError):
68 """ 69 Raised if an unknown tag is encountered while parsing a CLANS file. 70 """ 71 pass
72
73 74 -class Color(object):
75 """ 76 RGB color handling class. 77 Color is stored as r, g, b, and a (i.e. alpha) attributes. 78 Default color is C{r}=C{g}=C{b}=0 (i.e. black) with a=255 79 80 @param r: the red value 81 @type r: int 82 83 @param g: the green value 84 @type g: int 85 86 @param b: the blue value 87 @type b: int 88 89 @param a: the alpha value 90 @type a: int 91 """ 92
93 - def __init__(self, r=0, g=0, b=0, a=255):
94 self._r = None 95 self.r = r 96 self._g = None 97 self.g = g 98 self._b = None 99 self.b = b 100 self._a = None 101 self.a = a
102
103 - def __repr__(self):
104 return 'Color {0}'.format(self.to_clans_color())
105 106 __str__ = __repr__ 107 108 @staticmethod
109 - def from_string(color_string, separator=';'):
110 """ 111 Factory for a Color instance created from a string formatted as r{separator}g{separator}b{separator}a, where the final \'{separator}a\' is optional. 112 113 @param color_string: the color string 114 @type color_string: str 115 116 @raises TypeError: if C{color_string} is not a string 117 @raises ValueError: if any value in color is outside of range(256) 118 """ 119 if not isinstance(color_string, csb.core.string): 120 raise TypeError('{0} is no string'.format(color_string)) 121 122 if color_string.count(separator) == 2: 123 r, g, b = map(int, color_string.split(';')) 124 a = 255 125 elif color_string.count(separator) == 3: 126 r, g, b, a = map(int, color_string.split(';')) 127 128 else: 129 raise ValueError( 130 ('format needs to be \'r{0}g{0}b\' but color_string was ' + 131 '{1} [optionally with alpha value: \'r{0}g{0}b{0}a\']').format(separator, color_string)) 132 133 return Color(r, g, b, a)
134 135 @property
136 - def r(self):
137 """ 138 the red value of the RGB color. 139 140 raises ValueError if C{value} is outside of range(256) 141 142 @rtype: int 143 """ 144 return self._r
145 146 @r.setter
147 - def r(self, value):
148 """ 149 Set the red value of the RGB color. 150 """ 151 if value < 0 or value > 255: 152 raise ValueError( 153 'valid color values are in range(256), was \'{0}\''.format( 154 value)) 155 156 self._r = value
157 158 @property
159 - def g(self):
160 """ 161 the green value of the RGB color. 162 163 raises ValueError if C{value} is outside of range(256) 164 165 @rtype: int 166 """ 167 return self._g
168 169 @g.setter
170 - def g(self, value):
171 172 if value < 0 or value > 255: 173 raise ValueError('valid color values are in range(256).') 174 175 self._g = value
176 177 @property
178 - def b(self):
179 """ 180 the blue value of the RGB color. 181 182 raises ValueError if C{value} is outside of range(256) 183 184 @rtype: int 185 """ 186 return self._b
187 188 @b.setter
189 - def b(self, value):
190 191 if value < 0 or value > 255: 192 raise ValueError('valid color values are in range(256).') 193 194 self._b = value
195 196 @property
197 - def a(self):
198 """ 199 the alpha value of the RGB color. 200 201 raises ValueError if C{value} is outside of range(256) 202 203 @rtype: int 204 """ 205 return self._a
206 207 @a.setter
208 - def a(self, value):
209 210 if value < 0 or value > 255: 211 raise ValueError('valid color values are in range(256).') 212 213 self._a = value
214
215 - def to_clans_color(self):
216 """ 217 Formats the color for use in CLANS files. 218 219 @return: the color formatted for use in CLANS files; format: r;g;b;a 220 @rtype: str 221 """ 222 return '{0.r};{0.g};{0.b};{0.a}'.format(self)
223
224 225 -class ClansParser(object):
226 """ 227 CLANS file format aware parser. 228 """ 229
230 - def __init__(self):
231 self._clans_instance = None 232 self._data_block_dict = {}
233
234 - def __repr__(self):
235 return 'ClansParser instance'
236 237 __str__ = __repr__ 238 239 @property
240 - def clans_instance(self):
241 """ 242 the L{Clans} instance that resulted from parsing a CLANS file. 243 244 raises a ValueError if no CLANS file has been parsed yet 245 246 @rtype: L{Clans} instance 247 """ 248 if self._clans_instance is None: 249 raise ValueError('you need to parse a CLANS file first') 250 251 return self._clans_instance
252
253 - def parse_file(self, filename, permissive=True):
254 """ 255 Create a L{Clans} instance by parsing the CLANS format file C{filename} 256 257 @param filename: name of the CLANS file. 258 @type filename: str 259 260 @param permissive: if True, tolerate missing non-essential or unknown 261 blocks. 262 @type permissive: bool 263 264 @rtype: L{Clans} instance 265 @return: a L{Clans} instance containing the parsed data 266 267 @raise MissingBlockError: if C{permissive == True} and any essential 268 block is missing. if C{permissive == False} and any block is missing 269 @raise UnknownTagError: if C{permissive == False} and an unknown tag/ 270 data block is encountered 271 """ 272 self._clans_instance = Clans() 273 self._clans_instance._filename = filename 274 275 self._read_block_dict() # read and preprocess the CLANS file 276 277 try: # param and rotmtx are non-essential blocks 278 self._parse_param() 279 self._parse_rotmtx() 280 except MissingBlockError as error: 281 if not permissive: 282 raise MissingBlockError(error) 283 284 seq = {} 285 try: 286 seq = self._parse_seq() 287 except MissingBlockError as error: 288 if not permissive: 289 raise MissingBlockError(error) 290 291 seqgroups = self._parse_seqgroups() 292 293 pos = {} 294 try: 295 pos = self._parse_pos() 296 except MissingBlockError as error: 297 if not permissive: 298 raise MissingBlockError(error) 299 300 hsp_att_mode = "hsp" 301 hsp = {} 302 try: 303 if 'hsp' in self._data_block_dict: 304 hsp = self._parse_hsp_att('hsp') 305 306 elif 'att' in self._data_block_dict: 307 hsp_att_mode = "att" 308 hsp = self._parse_hsp_att('att') 309 310 elif 'mtx' in self._data_block_dict: 311 hsp = self._parse_mtx() 312 313 except MissingBlockError as error: 314 if not permissive: 315 raise MissingBlockError(error) 316 317 ## raise UnknownTagError for unknown blocks 318 known_block_tags = set(('param', 'rotmtx', 'seq', 'seqgroups', 'pos', 319 'hsp', 'mtx', 'att')) 320 unprocessed_block_tags = set(self._data_block_dict.keys()).difference( 321 known_block_tags) 322 323 if len(unprocessed_block_tags) > 0 and not permissive: 324 raise UnknownTagError( 325 ('tags unknown: {0}. File corrupt or further implementations ' 326 + 'needed!').format(', '.join(unprocessed_block_tags))) 327 328 ## if no entries exist, we cannot add pos, seqgroup and hsp data 329 if len(seq) > 0: 330 331 ## add Entries 332 if len(pos) > 0: 333 for i in pos: 334 self._clans_instance.add_entry(ClansEntry(seq[i][0], seq[i][1], 335 pos[i], parent=self._clans_instance)) 336 337 ## add groups 338 self._clans_instance._seqgroups = ClansSeqgroupCollection() 339 if len(seqgroups) > 0: 340 for group_raw_data in seqgroups: 341 342 group = ClansSeqgroup(name=group_raw_data['name'], 343 type=group_raw_data['type'], 344 size=group_raw_data['size'], 345 hide=group_raw_data['hide'] == '1', 346 color=group_raw_data['color']) 347 348 ## get members corresponding to the IDs in this group 349 ## NOTE: this silently corrects files where a seqgroup 350 ## contains the same entry multiple times 351 members = [self._clans_instance.entries[number] 352 for number in set(group_raw_data['numbers'])] 353 354 self._clans_instance.add_group(group, members) 355 356 ## add hsp values 357 if len(hsp) > 0: 358 [self._clans_instance.entries[a].add_hsp( 359 self._clans_instance.entries[b], value) 360 for ((a, b), value) in hsp.items()] 361 362 self._clans_instance._hsp_att_mode = hsp_att_mode 363 364 return self._clans_instance
365
366 - def _read_block_dict(self):
367 """ 368 Extracts all <tag>DATA</tag> blocks from file 369 self.clans_instance.filename. 370 371 @rtype: dict 372 @return: data in the form: dict[tag] = DATA. 373 """ 374 # read file and remove the first line, i.e. sequence=SEQUENCE_COUNT 375 data_blocks = open(os.path.expanduser( 376 self._clans_instance.filename)).read().split('\n', 1)[1] 377 378 ## flag re.DOTALL is necessary to make . match newlines 379 data = re.findall(r'(<(\w+)>(.+)</\2>)', data_blocks, 380 flags=re.DOTALL) 381 self._data_block_dict = dict([(tag, datum.strip().split('\n')) 382 for _tag_plus_data, tag, datum in data])
383
384 - def _parse_param(self):
385 """ 386 Parse a list of lines in the CLANS <param> format: 387 388 parameter1=data1\n 389 parameter2=data2\n 390 ... 391 """ 392 if 'param' not in self._data_block_dict: 393 raise MissingBlockError('file contains no <param> block.') 394 395 block = self._data_block_dict['param'] 396 397 tmp_params = dict([block[i].split('=') for i in range(len(block))]) 398 399 ## create colors entry from colorcutoffs and colorarr 400 colorcutoffs = [float(val) for val in 401 tmp_params.pop('colorcutoffs').strip(';').split(';')] 402 colors = tmp_params.pop('colorarr').strip(':') 403 colors = colors.replace('(', '').replace(')', '').split(':') 404 colorarr = [Color(*map(int, color_definition)) for color_definition in 405 [color.split(';') for color in colors]] 406 407 tmp_params['colors'] = tuple(zip(colorcutoffs, colorarr)) 408 409 ## convert 'true' and 'false' into Python bools 410 for k, v in tmp_params.items(): 411 if v == 'true': 412 tmp_params[k] = True 413 elif v == 'false': 414 tmp_params[k] = False 415 416 self._clans_instance._params = ClansParams(strict=False, **tmp_params)
417
418 - def _parse_rotmtx(self):
419 """ 420 Parse a list of lines in the CLANS <rotmtx> format. The data is stored 421 in the clans_instance as a 3x3 numpy.array. 422 423 @raise ValueError: if the rotmtx block does not contain exactly 3 lines 424 """ 425 if 'rotmtx' not in self._data_block_dict: 426 raise MissingBlockError('file contains no <rotmtx> block.') 427 428 block = self._data_block_dict['rotmtx'] 429 430 if len(block) != 3: 431 raise ValueError('CLANS <rotmtx> blocks comprise exactly 3 lines.') 432 self._clans_instance.rotmtx = array( 433 [[float64(val) for val in line.split(';')[:3]] for line in block])
434
435 - def _parse_seq(self):
436 """ 437 Parse a list of lines in the CLANS <seq> format, which are in FASTA 438 format. 439 440 @rtype: dict 441 @return: dict with running numbers as key and 2-tuples (id, sequence) 442 as values 443 """ 444 if 'seq' not in self._data_block_dict: 445 raise MissingBlockError( 446 'file contains no <seq> block. This is OK if the file does ' 447 + 'not contain any sequences.') 448 449 block = self._data_block_dict['seq'] 450 if len(block) % 2 == 1: 451 block += [''] 452 453 return dict([(i, (block[2 * i][1:], block[2 * i + 1].strip())) 454 for i in range(int(len(block) / 2))])
455
456 - def _parse_seqgroups(self):
457 """ 458 Parse a list of lines in the CLANS <seqgroup> format: 459 460 name=name of the group\n 461 type=0\n 462 size=12\n 463 hide=0\n 464 color=255;204;51\n 465 numbers=0;1;2;3;4;5;6;10;13\n 466 ... 467 468 @rtype: list 469 @return: list of dicts (one for each group) with the tags (name, type, 470 size, hide, ...) as keys and their typecasted data as values 471 (i.e. name will be a string, size will be an integer, etc) 472 """ 473 if 'seqgroups' not in self._data_block_dict: 474 return ClansSeqgroupCollection() 475 476 block = self._data_block_dict['seqgroups'] 477 478 groups = [] 479 for line in block: 480 p, v = line.split('=', 1) 481 if p == 'name': 482 groups.append({'name': v}) 483 elif p == 'numbers': 484 groups[-1][p] = [int(val) for val in v.split(';')[:-1]] 485 else: 486 groups[-1][p] = v 487 return groups
488
489 - def _parse_pos(self):
490 """ 491 Parse a list of lines in the CLANS <pos> format \'INT FLOAT FLOAT 492 FLOAT\'. 493 494 @rtype: dict 495 @return: a dict using the integers as keys and a (3,1)-array created 496 from the three floats as values. 497 """ 498 if 'pos' not in self._data_block_dict: 499 raise MissingBlockError( 500 'file contains no <pos> block. This is OK if the file does ' 501 + 'not contain any sequences.') 502 503 block = self._data_block_dict['pos'] 504 505 return dict([(int(l.split()[0]), 506 array([float64(val) for val in l.split()[1:]])) 507 for l in block])
508
509 - def _parse_hsp_att(self, mode):
510 """ 511 Parse a list of lines in the CLANS <hsp> format \'INT INT: FLOAT\'. 512 513 NOTE: some CLANS <hsp> lines contain more than one float; we omit the 514 additional numbers 515 516 @param mode: either "hsp" or "att" depending on the type of tag to be 517 parsed 518 @type mode: str 519 520 @rtype: dict 521 @return: a dict using 2-tuples of the two integers as keys and the 522 float as values 523 """ 524 if mode not in ("hsp", "att"): 525 raise ValueError('mode must be either "hsp" or "att"') 526 527 if mode not in self._data_block_dict: 528 raise MissingBlockError( 529 ('file contains no <{0}> block. This is OK if the file does ' 530 + 'not contain any sequences or if none of the contained ' 531 + 'sequences have any connections.').format(mode)) 532 533 block = self._data_block_dict[mode] 534 535 if mode == "hsp": 536 return dict([(tuple([int(val) 537 for val in line.split(':')[0].split()]), 538 float(line.split(':')[1].split(' ')[0])) 539 for line in block]) 540 541 else: 542 return dict([(tuple([int(val) for val in line.split(' ')[:2]]), 543 float(line.split(' ')[2])) 544 for line in block])
545
546 - def _parse_mtx(self):
547 """ 548 Parse a list of lines in the CLANS <mtx> format. 549 550 @rtype: dict 551 @return: a dict using 2-tuples of the two integers as keys and the 552 float as values 553 """ 554 if 'mtx' not in self._data_block_dict: 555 raise MissingBlockError( 556 'file contains no <mtx> block. This is OK if the file does ' 557 + 'not contain any sequences or if none of the contained ' 558 + 'sequences have any connections.') 559 560 block = self._data_block_dict['mtx'] 561 562 return dict([((i, j), float(entry)) 563 for i, line in enumerate(block) 564 for j, entry in enumerate(line.split(';')[:-1]) 565 if float(entry) != 0])
566
567 -class ClansFileBuilder(object):
568 """ 569 Base abstract files for building a file in CLANS format. 570 Defines a common step-wise interface according to the Builder pattern. 571 572 @param output: output stream (this is where the product is constructed) 573 @type output: stream 574 """ 575 576 __metaclass__ = ABCMeta 577
578 - def __init__(self, output):
579 580 if not hasattr(output, 'write'): 581 raise TypeError(output) 582 583 self._out = output
584 585 @property
586 - def output(self):
587 """ 588 Destination stream 589 @rtype: stream 590 """ 591 return self._out
592
593 - def write(self, text):
594 """ 595 Write a chunk of text 596 """ 597 self._out.write(text)
598
599 - def writeline(self, text):
600 """ 601 Write a chunk of text and append a new line terminator 602 """ 603 self._out.write(text) 604 self._out.write('\n')
605 606 @abstractmethod
607 - def add_param_block(self, block_data):
608 pass
609 610 @abstractmethod
611 - def add_rotmtx_block(self, block_data):
612 pass
613 614 @abstractmethod
615 - def add_seq_block(self, block_data):
616 pass
617 618 @abstractmethod
619 - def add_seqgroups_block(self, block_data):
620 pass
621 622 @abstractmethod
623 - def add_pos_block(self, block_data):
624 pass
625 626 @abstractmethod
627 - def add_hsp_block(self, block_data):
628 pass
629
630 631 -class ClansFileWriter(ClansFileBuilder):
632 """ 633 Class for serializing a L{Clans} instance to a file in CLANS format. 634 635 @param output: the output stream 636 @type output: stream 637 """ 638 639
640 - def __init__(self, output):
641 super(ClansFileWriter, self).__init__(output)
642
643 - def serialize(self, clans_instance):
644 """ 645 Creates a CLANS file containing all data from C{clans_instance} 646 647 @param clans_instance: the source of the data to be serialized 648 @type clans_instance: a L{Clans} instance 649 """ 650 651 self.add_sequences_line(clans_instance) 652 self.add_param_block(clans_instance) 653 self.add_rotmtx_block(clans_instance) 654 self.add_seq_block(clans_instance) 655 self.add_seqgroups_block(clans_instance) 656 self.add_pos_block(clans_instance) 657 self.add_hsp_block(clans_instance)
658
659 - def add_sequences_line(self, clans_instance):
660 """ 661 Appends the \'sequences=<#sequences>\' line to the output. 662 663 @param clans_instance: the source of the data to be serialized 664 @type clans_instance: a L{Clans} instance 665 """ 666 self.writeline('sequences={0}'.format(len(clans_instance.entries)))
667
668 - def add_param_block(self, clans_instance):
669 """ 670 Appends a <param>data</param> CLANS file block to the output. 671 672 @param clans_instance: the source of the data to be serialized 673 @type clans_instance: a L{Clans} instance 674 """ 675 param_block = clans_instance.params._to_clans_param_block() 676 self.write(param_block)
677
678 - def add_rotmtx_block(self, clans_instance):
679 """ 680 Appends a <rotmtx>data</rotmtx> CLANS file block to the output. 681 682 @param clans_instance: the source of the data to be serialized 683 @type clans_instance: a L{Clans} instance 684 685 @raise ValueError: if self.clans_instance.rotmtx is no 3x3 numpy.array 686 """ 687 rotmtx = clans_instance.rotmtx 688 689 if rotmtx is None: 690 return 691 692 if rotmtx.shape != (3, 3): 693 raise ValueError('rotmtx must be a 3x3 array') 694 695 self.writeline('<rotmtx>') 696 697 self.write('\n'.join( 698 ['{0};{1};{2};'.format(*tuple(rotmtx[i])) for i in range(3)])) 699 self.write('\n') 700 701 self.writeline('</rotmtx>')
702
703 - def add_seq_block(self, clans_instance):
704 """ 705 Appends a <seq>data</seq> CLANS file block to the output. 706 707 @param clans_instance: the source of the data to be serialized 708 @type clans_instance: a L{Clans} instance 709 """ 710 self.writeline('<seq>') 711 712 self.write(''.join([e.output_string_seq() 713 for e in clans_instance.entries])) 714 715 self.writeline('</seq>')
716
717 - def add_seqgroups_block(self, clans_instance):
718 """ 719 Appends a <seqgroupsparam>data</seqgroups> CLANS file block to the output. 720 721 @param clans_instance: the source of the data to be serialized 722 @type clans_instance: a L{Clans} instance 723 """ 724 seqgroups = clans_instance.seqgroups 725 726 if seqgroups is not None and len(seqgroups) > 0: 727 728 self.writeline('<seqgroups>') 729 730 self.write('\n'.join([s.output_string() for s in seqgroups])) 731 self.write('\n') 732 733 self.writeline('</seqgroups>')
734
735 - def add_pos_block(self, clans_instance):
736 """ 737 Appends a <pos>data</pos> CLANS file block to the output. 738 739 @param clans_instance: the source of the data to be serialized 740 @type clans_instance: a L{Clans} instance 741 """ 742 self.writeline('<pos>') 743 744 self.write('\n'.join([e.output_string_pos() 745 for e in clans_instance.entries])) 746 self.write('\n') 747 748 self.writeline('</pos>')
749
750 - def add_hsp_block(self, clans_instance):
751 """ 752 Appends a <hsp>data</hsp> CLANS file block to the output. 753 If the CLANS instance has hsp_att_mode=="att" we add a <att>data<att> 754 block which has the same format. 755 756 @param clans_instance: the source of the data to be serialized 757 @type clans_instance: a L{Clans} instance 758 """ 759 760 self.writeline('<{0}>'.format(clans_instance._hsp_att_mode)) 761 762 ## sorting is not necessary, but makes a nicer looking clans file 763 idToEntryMapping = [(e.get_id(), e) 764 for e in clans_instance.entries] 765 idToEntryMapping.sort(key=operator.itemgetter(0)) 766 entryToIdMapping = dict([(entry, identifier) 767 for (identifier, entry) in idToEntryMapping]) 768 769 for i, (entry1_id, entry1) in enumerate(idToEntryMapping): 770 771 ## sort list of hsp targets by id 772 hspTargets = [(entryToIdMapping[entry2], pvalue) 773 for (entry2, pvalue) in entry1.hsp.items()] 774 hspTargets.sort(key=operator.itemgetter(0)) 775 776 for (entry2_id, pvalue) in hspTargets: 777 if entry1_id >= entry2_id: 778 continue 779 780 line_format = '{0} {1}:{2}\n' 781 if clans_instance._hsp_att_mode == "att": 782 line_format = '{0} {1} {2}\n' 783 784 self.write( 785 line_format.format(entry1_id, entry2_id, repr(pvalue))) 786 787 self.writeline('</{0}>'.format(clans_instance._hsp_att_mode))
788
789 790 -class ClansParams(object):
791 """ 792 Class for handling L{Clans} parameters. 793 See L{ClansParams}._DEFAULTS for accepted parameter names. 794 795 @kwparam **kw: parameters as C{kw[parameter_name] = parameter_value} 796 797 @raise KeyError: if a supplied parameter name is not known 798 (i.e. it is not a key in _DEFAULTS) 799 """ 800 801 _DEFAULTS = {'attfactor': 10.0, 802 'attvalpow': 1, 803 'avgfoldchange': False, 804 'blastpath': 'blastall -p blastp', 805 'cluster2d': False, 806 'colors': ((0.0, (230, 230, 230)), 807 (0.1, (207, 207, 207)), 808 (0.2, (184, 184, 184)), 809 (0.3, (161, 161, 161)), 810 (0.4, (138, 138, 138)), 811 (0.5, (115, 115, 115)), 812 (0.6, (92, 92, 92)), 813 (0.7, (69, 69, 69)), 814 (0.8, (46, 46, 46)), 815 (0.9, (23, 23, 23))), 816 'complexatt': True, 817 'cooling': 1.0, 818 'currcool': 1.0, 819 'dampening': 0.2, 820 'dotsize': 2, 821 'formatdbpath': 'formatdb', 822 'groupsize': 4, 823 'maxmove': 0.1, 824 'minattract': 1.0, 825 'ovalsize': 10, 826 'pval': 1.0, 827 'repfactor': 5.0, 828 'repvalpow': 1, 829 'showinfo': True, 830 'usefoldchange': False, 831 'usescval': False, 832 'zoom': 1.0} 833
834 - def __init__(self, strict=True, **kw):
835 self.set_default_params() 836 837 for param_name, param_value in kw.items(): 838 if param_name not in self._DEFAULTS and strict: 839 raise KeyError('parameter {0} (value: {1}) unknown'.format( 840 param_name, param_value)) 841 self.__setattr__(param_name, param_value)
842 843 @property
844 - def complexatt(self):
845 """ 846 if True, complex attraction computations are used. 847 848 raises ValueError if set to non-boolean value 849 850 @rtype: bool 851 """ 852 return self._complexatt
853 854 @complexatt.setter
855 - def complexatt(self, value):
856 if not isinstance(value, bool): 857 raise ValueError(('complexatt cannot be {0} (accepted values: True' 858 + '/False)').format(value)) 859 self._complexatt = value
860 861 @property
862 - def attfactor(self):
863 """ 864 factor in the attractive force 865 866 raises ValueError if C{value} is not castable to float 867 868 @rtype: float 869 """ 870 return self._attfactor
871 872 @attfactor.setter
873 - def attfactor(self, value):
874 self._attfactor = float(value)
875 876 @property
877 - def attvalpow(self):
878 """ 879 exponent in the attractive force 880 881 raises ValueError if C{value} is not castable to float 882 883 @rtype: float 884 """ 885 return self._attvalpow
886 887 @attvalpow.setter
888 - def attvalpow(self, value):
889 self._attvalpow = float(value)
890 891 @property
892 - def repfactor(self):
893 """ 894 factor in the repulsive force 895 896 raises ValueError if C{value} is not castable to float 897 898 @rtype: float 899 """ 900 return self._repfactor
901 902 @repfactor.setter
903 - def repfactor(self, value):
904 self._repfactor = float(value)
905 906 @property
907 - def repvalpow(self):
908 """ 909 exponent in the repulsive force 910 911 raises ValueError if C{value} is not castable to float 912 913 @rtype: float 914 """ 915 return self._repvalpow
916 917 @repvalpow.setter
918 - def repvalpow(self, value):
919 self._repvalpow = float(value)
920 921 @property
922 - def cluster2d(self):
923 """ 924 if True, clustering is done in 2D. Else in 3D. 925 926 raises ValueError if set to non-boolean value 927 928 @rtype: bool 929 """ 930 return self._cluster2d
931 932 933 @cluster2d.setter
934 - def cluster2d(self, value):
935 if not isinstance(value, bool): 936 raise ValueError(('cluster2d cannot be {0} (accepted values: True' 937 + '/False)').format(value)) 938 939 self._cluster2d = value
940 941 @property
942 - def pval(self):
943 """ 944 p-value cutoff that determines which connections are considered for 945 the attractive force 946 947 raises ValueError if C{value} is not castable to float 948 949 @rtype: float 950 """ 951 return self._pval
952 953 @pval.setter
954 - def pval(self, value):
955 self._pval = float(value)
956 957 @property
958 - def maxmove(self):
959 """ 960 maximal sequence (i.e. dot in the clustermap) movement per round 961 962 raises ValueError if C{value} is not castable to float 963 964 @rtype: float 965 """ 966 return self._maxmove
967 968 @maxmove.setter
969 - def maxmove(self, value):
970 self._maxmove = float(value)
971 972 @property
973 - def usescval(self):
974 """ 975 parameter with unclear function. Check in Clans. 976 977 raises ValueError if set to non-boolean value 978 979 @rtype: bool 980 """ 981 return self._usescval
982 983 @usescval.setter
984 - def usescval(self, value):
985 if not isinstance(value, bool): 986 raise ValueError(('usescval cannot be {0} (accepted values: True' 987 + '/False)').format(value)) 988 989 self._usescval = value
990 991 @property
992 - def cooling(self):
993 """ 994 parameter with unclear function. Check in Clans. 995 996 raises ValueError if C{value} is not castable to float 997 998 @rtype: float 999 """ 1000 return self._cooling
1001 1002 @cooling.setter
1003 - def cooling(self, value):
1004 self._cooling = float(value)
1005 1006 @property
1007 - def currcool(self):
1008 """ 1009 parameter with unclear function. Check in Clans. 1010 1011 raises ValueError if C{value} is not castable to float 1012 1013 @rtype: float 1014 """ 1015 return self._currcool
1016 1017 @currcool.setter
1018 - def currcool(self, value):
1019 self._currcool = float(value)
1020 1021 @property
1022 - def dampening(self):
1023 """ 1024 parameter with unclear function. Check in Clans. 1025 1026 raises ValueError if C{value} is not castable to float 1027 1028 @rtype: float 1029 """ 1030 return self._dampening
1031 1032 @dampening.setter
1033 - def dampening(self, value):
1034 self._dampening = float(value)
1035 1036 @property
1037 - def minattract(self):
1038 """ 1039 parameter with unclear function. Check in Clans. 1040 1041 raises ValueError if C{value} is not castable to float 1042 1043 @rtype: float 1044 """ 1045 return self._minattract
1046 1047 @minattract.setter
1048 - def minattract(self, value):
1049 self._minattract = float(value)
1050 1051 @property
1052 - def blastpath(self):
1053 """ 1054 path to the BLAST executable for protein-protein comparisons. BLAST+ is 1055 currently not supported by Clans. 1056 1057 raises ValueError if C{value} is not a string 1058 1059 @rtype: str 1060 """ 1061 return self._blastpath
1062 1063 @blastpath.setter
1064 - def blastpath(self, value):
1065 if not isinstance(value, csb.core.string): 1066 raise ValueError(('blastpath cannot be {0} (accepted values: ' 1067 + 'strings)').format(value)) 1068 1069 self._blastpath = value
1070 1071 @property
1072 - def formatdbpath(self):
1073 """ 1074 path to the formatdb executable of BLAST. 1075 1076 raises ValueError if C{value} is not a string 1077 1078 @rtype: str 1079 """ 1080 return self._formatdbpath
1081 1082 @formatdbpath.setter
1083 - def formatdbpath(self, value):
1084 if not isinstance(value, csb.core.string): 1085 raise ValueError(('formatdbpath cannot be {0} (accepted values: ' 1086 + 'strings)').format(value)) 1087 1088 self._formatdbpath = value
1089 1090 @property
1091 - def showinfo(self):
1092 """ 1093 if True, additional data (rotation matrix) is shown in the clustring 1094 window) 1095 1096 raises ValueError if set to non-boolean value 1097 1098 @rtype: bool 1099 """ 1100 return self._showinfo
1101 1102 @showinfo.setter
1103 - def showinfo(self, value):
1104 if not isinstance(value, bool): 1105 raise ValueError(('showinfo cannot be {0} (accepted values: True' 1106 + '/False)').format(value)) 1107 1108 self._showinfo = value
1109 1110 @property
1111 - def zoom(self):
1112 """ 1113 zoom value (1.0 == not zoomed) 1114 1115 raises ValueError if C{value} is not castable to float 1116 1117 @rtype: float 1118 """ 1119 return self._zoom
1120 1121 @zoom.setter
1122 - def zoom(self, value):
1123 self._zoom = float(value)
1124 1125 @property
1126 - def dotsize(self):
1127 """ 1128 size of the central dot representing each sequence in the clustermap 1129 1130 raises ValueError if C{value} is not castable to int 1131 1132 @rtype: int 1133 """ 1134 return self._dotsize
1135 1136 @dotsize.setter
1137 - def dotsize(self, value):
1138 self._dotsize = int(value)
1139 1140 @property
1141 - def ovalsize(self):
1142 """ 1143 size of the circle around selected sequences 1144 1145 raises ValueError if value not castable to int 1146 1147 @rtype: int 1148 """ 1149 return self._ovalsize
1150 1151 @ovalsize.setter
1152 - def ovalsize(self, value):
1153 self._ovalsize = int(value)
1154 1155 @property
1156 - def groupsize(self):
1157 """ 1158 default for the size of circles that mark newly created groups 1159 1160 raises ValueError if C{value} is not castable to int 1161 1162 @rtype: int 1163 """ 1164 return self._groupsize
1165 1166 @groupsize.setter
1167 - def groupsize(self, value):
1168 self._groupsize = int(value)
1169 1170 @property
1171 - def usefoldchange(self):
1172 """ 1173 parameter with unclear function. Check in Clans. 1174 1175 raises ValueError if set to non-boolean value 1176 1177 @rtype: bool 1178 """ 1179 return self._usefoldchange
1180 1181 @usefoldchange.setter
1182 - def usefoldchange(self, value):
1183 if not isinstance(value, bool): 1184 raise ValueError(('usefoldchange cannot be {0} (accepted values: ' 1185 + 'True/False)').format(value)) 1186 1187 self._usefoldchange = value
1188 1189 @property
1190 - def avgfoldchange(self):
1191 """ 1192 parameter with unclear function. Check in Clans. 1193 1194 raises ValueError if set to non-boolean value 1195 1196 @rtype: bool 1197 """ 1198 return self._avgfoldchange
1199 1200 @avgfoldchange.setter
1201 - def avgfoldchange(self, value):
1202 if not isinstance(value, bool): 1203 raise ValueError(('avgfoldchange cannot be {0} (accepted values: ' 1204 + 'True/False)').format(value)) 1205 1206 self._avgfoldchange = value
1207 1208 @property
1209 - def colors(self):
1210 """ 1211 colors that define the coloring for different p-values/attractions 1212 1213 raises ValueError if set to s.th. else than a 10-tuple of 2-tuples 1214 1215 @rtype: tuple 1216 """ 1217 return self._colors
1218 1219 @colors.setter
1220 - def colors(self, value):
1221 if not isinstance(value, tuple): 1222 raise ValueError('colors must be a tuple') 1223 if len(value) != 10: 1224 raise ValueError('colors must be a 10-tuple') 1225 lengths = [len(v) for v in value] 1226 if len(set(lengths)) != 1 or lengths[0] != 2: 1227 raise ValueError('each item of colors must be a 2-tuple') 1228 self._colors = value
1229
1230 - def set_default_params(self):
1231 """ 1232 Sets the parameters to CLANS default values. 1233 See L{ClansParams}._DEFAULTS. 1234 """ 1235 for k, v in self._DEFAULTS.items(): 1236 if k == 'colors': 1237 continue 1238 1239 self.__setattr__(k, v) 1240 1241 tmp_list = [] 1242 for i, (cutoff, color) in enumerate(ClansParams._DEFAULTS['colors']): 1243 tmp_list.append((cutoff, Color(*color))) 1244 self.colors = tuple(tmp_list)
1245
1246 - def _to_clans_param_block(self):
1247 """ 1248 Creates a param block for a CLANS file from the L{ClansParams} values. 1249 1250 @return: a CLANS file format <param>[data]</param> block 1251 @rtype: str 1252 """ 1253 1254 param_dict = {} 1255 1256 for param_name in sorted(ClansParams._DEFAULTS): 1257 if param_name == 'colors': 1258 1259 ## divide 'colors' into 'colorcutoffs' and 'colorarr' 1260 param_dict['colorcutoffs'] = ''.join( 1261 ['{0:.2f};'.format(cutoff) for cutoff, color in self.colors]) 1262 1263 param_dict['colorarr'] = ''.join( 1264 ['({0}):'.format(color.to_clans_color()) 1265 for cutoff, color in self.colors]) 1266 1267 continue 1268 1269 if param_name in ('avgfoldchange', 'cluster2d', 'complexatt', 1270 'showinfo', 'usefoldchange', 'usescval'): 1271 param_dict[param_name] = ['false', 'true'][ 1272 self.__getattribute__(param_name)] 1273 1274 continue 1275 1276 param_dict[param_name] = self.__getattribute__(param_name) 1277 1278 param_block_string = '<param>\n' 1279 param_block_string += '\n'.join( 1280 ['{0}={1}'.format(param_name, param_dict[param_name]) 1281 for param_name in sorted(param_dict)]) 1282 param_block_string += '\n</param>\n' 1283 1284 return param_block_string
1285
1286 1287 -class ClansEntryCollection(csb.core.ReadOnlyCollectionContainer):
1288 """ 1289 Read-only container for holding (and internally managing) L{ClansEntry} 1290 instances. 1291 """ 1292
1293 - def __init__(self):
1294 1295 super(ClansEntryCollection, self).__init__(type=ClansEntry)
1296
1297 - def _remove_item(self, item):
1298 """ 1299 Removes {item} from the collection. 1300 1301 @param item: the item to be removed 1302 @type item: a L{ClansEntry} instance 1303 1304 @raises TypeError: if {item} is not a L{ClansEntry} instance 1305 """ 1306 1307 if self._type: 1308 if not isinstance(item, self._type): 1309 raise TypeError("Item {0} is not of the required {1} type.".format( 1310 item, self._type.__name__)) 1311 self._items.remove(item)
1312
1313 - def _sort(self, key=None):
1314 """ 1315 Sort entries by their {name} or by a custom key function. 1316 1317 @Note: If the L{ClansEntryCollection} is part of a L{Clans} instance, 1318 use L{Clans.sort} instead to avoid silently corrupting the index in 1319 L{Clans._idx}. 1320 1321 @param key: None to sort by name, a custom key function else 1322 @type key: function 1323 """ 1324 if key is None: 1325 key = lambda entry: entry.name 1326 1327 self._items.sort(key=key)
1328
1329 1330 -class ClansSeqgroupCollection(csb.core.ReadOnlyCollectionContainer):
1331 """ 1332 Read-only container for holding (and internally managing) L{ClansSeqgroup} 1333 instances. 1334 """ 1335
1336 - def __init__(self):
1339
1340 - def _remove_item(self, item):
1341 """ 1342 Removes {item} from the collection. 1343 1344 @param item: the item to be removed 1345 @type item: a L{ClansSeqgroup} instance 1346 1347 @raises TypeError: if {item} is not a L{ClansSeqgroup} instance 1348 """ 1349 1350 if self._type: 1351 if not isinstance(item, self._type): 1352 raise TypeError("Item {0} is not of the required {1} type.".format( 1353 item, self._type.__name__)) 1354 self._items.remove(item)
1355
1356 1357 -class Clans(object):
1358 """ 1359 Class for holding and manipulating data from one CLANS file. 1360 Initialization is always done as empty clustermap with default parameters. 1361 """ 1362
1363 - def __init__(self):
1364 self._filename = None 1365 1366 self._params = ClansParams() 1367 1368 self._rotmtx = None 1369 self.set_default_rotmtx() 1370 1371 self._hsp_att_mode = "hsp" 1372 1373 self._entries = ClansEntryCollection() 1374 self._seqgroups = ClansSeqgroupCollection() 1375 1376 self._idx = None 1377 '''Index dict for fast access to entry positions''' 1378 self._has_good_index = False
1379
1380 - def __repr__(self):
1381 return 'Clans object: {0} sequences; {1} seqgroups'.format( 1382 len(self), len(self.seqgroups))
1383 1384 __str__ = __repr__ 1385
1386 - def __len__(self):
1387 return len(self.entries)
1388
1389 - def __getitem__(self, index):
1390 return self.entries[index]
1391
1392 - def __setitem__(self, index, data):
1393 self.entries[index] = data 1394 self._has_good_index = False
1395 1396 @property
1397 - def filename(self):
1398 """ 1399 file from which the data was parsed 1400 1401 @rtype: str or None 1402 """ 1403 return self._filename
1404 1405 @property
1406 - def params(self):
1407 """ 1408 L{ClansParams} that contains the parameters set for this L{Clans} 1409 instance. 1410 1411 @rtype: L{ClansParams} 1412 """ 1413 return self._params
1414 1415 @property
1416 - def rotmtx(self):
1417 """ 1418 3x3 rotation matrix that indicates the rotation state of the clustermap 1419 1420 raises ValueError if rotation matrix shape is not 3x3 1421 1422 @rtype: numpy.array 1423 """ 1424 return self._rotmtx
1425 1426 @rotmtx.setter
1427 - def rotmtx(self, value):
1428 if value.shape != (3, 3): 1429 raise ValueError('rotation matrix needs to be a 3x3 numpy array') 1430 self._rotmtx = value
1431 1432 @property
1433 - def entries(self):
1434 """ 1435 list of clustermap L{ClansEntry}s. 1436 1437 @rtype: list 1438 """ 1439 return self._entries
1440 1441 @property
1442 - def seqgroups(self):
1443 """ 1444 list of L{ClansSeqgroup}s defined in the clustermap. 1445 1446 @rtype: list 1447 """ 1448 return self._seqgroups
1449
1450 - def set_default_rotmtx(self):
1451 """ 1452 Resets the rotation matrix (rotmtx) to no rotation. 1453 """ 1454 self.rotmtx = eye(3)
1455
1456 - def _update_index(self):
1457 """ 1458 Creates an index of L{ClansEntry}s to their position in the L{Clans} 1459 instance. 1460 1461 The index is used to allow for fast access via L{ClansEntry.get_id} and 1462 was introduced to get a better L{Clans}.write() performance, which 1463 suffered from excessive entry.get_id() calls during HSP block generation 1464 (see L{ClansFileWriter.add_hsp_block}). 1465 1466 @raises DuplicateEntryError: if two entries have the same name, sequence, and coordinates 1467 """ 1468 unique_ids = [e._get_unique_id() for e in self] 1469 1470 if len(unique_ids) != len(set(unique_ids)): 1471 for i, entry in enumerate(unique_ids): 1472 if unique_ids.count(entry) != 1: 1473 raise DuplicateEntryError( 1474 '{0} is contained multiple times in this Clans instance'.format( 1475 repr(self.entries[i]))) 1476 1477 self._idx = dict(zip(unique_ids, range(len(self)))) 1478 self._has_good_index = True
1479
1480 - def initialize(self):
1481 """Initializes the coordinates of all entries with random numbers in [-1, 1].""" 1482 [entry.initialize_coordinates() for entry in self]
1483
1484 - def sort(self, key=None):
1485 """ 1486 Sorts the L{ClansEntry}s by their name or by a custom comparison function. 1487 1488 @param key: a custom key function 1489 @type key: function 1490 """ 1491 self._entries._sort(key) 1492 1493 self._has_good_index = False
1494
1495 - def add_group(self, group, members=None):
1496 """ 1497 Adds a new group. 1498 1499 @param group: the new group 1500 @type group: L{ClansSeqgroup} instance 1501 1502 @param members: L{ClansEntry} instances to be in the new group 1503 @type members: list 1504 1505 @raise ValueError: if group is no ClansSeqgroup instance 1506 """ 1507 self.seqgroups._append_item(group) 1508 1509 if members is not None: 1510 [group.add(member) for member in members]
1511
1512 - def remove_group(self, group):
1513 """ 1514 Removes a group. 1515 1516 @param group: the new group 1517 @type group: L{ClansSeqgroup} instance 1518 """ 1519 self.seqgroups._remove_item(group) 1520 1521 [group.remove(member) for member in group.members[:]]
1522
1523 - def append_groups_from(self, other):
1524 ''' 1525 Append the L{ClansSeqgroup}-s of C{other} that contain at least one entry 1526 that corresponds to an entry in this instance. Entries are compared by 1527 their name only! Groups without any matching members in this instance are 1528 not created in the local instance. 1529 1530 @param other: the source of the new group definitions 1531 @type other: L{Clans} instance 1532 ''' 1533 1534 for group in other.seqgroups: 1535 new_group = ClansSeqgroup(name=group.name, 1536 type=group.type, 1537 size=group.size, 1538 hide=group.hide, 1539 color=group.color) 1540 1541 for member in group.members: 1542 try: 1543 new_member = self.get_entry(member.name, pedantic=True) 1544 1545 except ValueError: # no entry with this name found 1546 continue 1547 1548 except DuplicateEntryNameError: 1549 raise DuplicateEntryNameError('multiple entries with identical name: {0}'.format(member.name)) 1550 1551 new_group.add(new_member) 1552 1553 if len(new_group.members) > 0: 1554 self.add_group(new_group)
1555
1556 - def add_entry(self, entry):
1557 """ 1558 Adds an new entry. 1559 1560 @param entry: the new entry 1561 @type entry: L{ClansEntry} instance 1562 1563 @raise ValueError: if C{entry} is no L{ClansEntry} instance 1564 """ 1565 if not isinstance(entry, ClansEntry): 1566 raise ValueError('entries need to be L{ClansEntry} instances') 1567 1568 self.entries._append_item(entry) 1569 entry._parent = self 1570 1571 self._has_good_index = False
1572
1573 - def remove_entry_by_name(self, entry_name):
1574 """ 1575 Removes an entry fetched by its name. 1576 1577 @param entry_name: name of the entry that shall be removed 1578 @type entry_name: string 1579 """ 1580 entry = self.get_entry(entry_name, True) 1581 1582 self.remove_entry(entry)
1583
1584 - def remove_entry(self, entry):
1585 """ 1586 Removes an entry. 1587 1588 @param entry: the entry that shall be removed 1589 @type entry: L{ClansEntry} instance 1590 """ 1591 for other_entry in entry.hsp.keys(): 1592 other_entry.remove_hsp(entry) 1593 1594 groups = [g for g in entry.groups] 1595 for g in groups: 1596 g.remove(entry) 1597 1598 remove_groups = [g for g in self.seqgroups if g.is_empty()] 1599 [self.seqgroups._remove_item(g) for g in remove_groups] 1600 1601 self.entries._remove_item(entry) 1602 self._has_good_index = False
1603
1604 - def get_entry(self, name, pedantic=True):
1605 """ 1606 Checks if an entry with name C{name} exists and returns it. 1607 1608 @param name: name of the sought entry 1609 @type name: str 1610 1611 @param pedantic: If True, a ValueError is raised if multiple entries 1612 with name name are found. If False, returns the first 1613 one. 1614 @type pedantic: bool 1615 1616 @raise ValueError: if no entry with name C{name} is found 1617 @raise DuplicateEntryNameError: if multiple entries with name C{name} 1618 are found and C{pedantic == True} 1619 1620 @rtype: L{ClansEntry} 1621 @return: entry with name C{name} 1622 """ 1623 1624 hits = [e for e in self.entries if e.name == name] 1625 1626 if len(hits) == 1: 1627 return hits[0] 1628 1629 elif len(hits) > 1: 1630 if pedantic: 1631 raise DuplicateEntryNameError( 1632 'multiple entries have name \'{0}\''.format(name)) 1633 return hits[0] 1634 1635 else: 1636 raise ValueError('ClansEntry {0} does not exist.'.format(name))
1637 1638
1639 - def restrict_to_max_pvalue(self, cutoff):
1640 """ 1641 removes all L{ClansEntry}s that have no connections above the C{cutoff} 1642 1643 @param cutoff: the cutoff 1644 @type cutoff: float 1645 """ 1646 ## loop to hit entries that have no HSPs left after the previous round 1647 removed_entries = [] # all removed entries go here 1648 remove_us = ['first_loop_round_starter'] 1649 while len(remove_us) > 0: 1650 1651 remove_us = [] # entries removed this round 1652 for entry in self.entries: 1653 hsp_values = entry.hsp.values() 1654 if len(hsp_values) == 0 or min(hsp_values) >= cutoff: 1655 remove_us.append(entry) 1656 removed_entries.append(entry) 1657 1658 [self.remove_entry(e) for e in remove_us if e in self] 1659 1660 return removed_entries
1661
1662 - def restrict(self, keep_names):
1663 """ 1664 Removes all entries whose name is not in keep_names 1665 1666 @param keep_names: names of entries that shall be kept 1667 @type keep_names: iterable 1668 """ 1669 1670 [self.remove_entry(entry) for entry in 1671 [e for e in self.entries if e.name not in keep_names]]
1672
1673 - def write(self, filename):
1674 """ 1675 writes the L{Clans} instance to a file in CLANS format 1676 1677 @param filename: the target file\'s name 1678 @type filename: str 1679 """ 1680 1681 with open(filename, 'w') as stream: 1682 writer = ClansFileWriter(stream) 1683 writer.serialize(self)
1684
1685 1686 -class ClansEntry(object):
1687 """ 1688 Class holding the data of one CLANS sequence entry. 1689 1690 @param name: the entry name 1691 @type name: str 1692 1693 @param seq: the entry\'s amino acid sequence 1694 @type seq: str 1695 1696 @param coords: coordinates in 3D space 1697 @type coords: iterable with 3 items 1698 1699 @param parent: parent of this entry 1700 @type parent: L{Clans} instance 1701 """ 1702
1703 - def __init__(self, name=None, seq='', coords=None, parent=None):
1704 self._name = name 1705 self._seq = seq 1706 1707 if coords is None: 1708 self.initialize_coordinates() 1709 else: 1710 self._coords = coords 1711 1712 self._parent = parent 1713 1714 self._groups = [] 1715 self._hsp = {}
1716
1717 - def __repr__(self):
1718 if self.coords is None: 1719 coords_string = 'NoCoordsSet' 1720 else: 1721 coords_string = '({0:.2f}, {1:.2f}, {2:.2f})'.format( 1722 *tuple(self.coords)) 1723 1724 groups = 'not in a group' 1725 if len(self.groups) > 0: 1726 groups = 'groups: {0}'.format( 1727 ', '.join([g.name for g in self.groups])) 1728 1729 return 'ClansEntry "{0}": {1} '.format( 1730 self.name, '; '.join((coords_string, groups)))
1731 1732 @property
1733 - def name(self):
1734 """ 1735 name of the entry 1736 1737 raises ValueError if C{value} is not a string 1738 1739 @rtype: string 1740 """ 1741 return self._name
1742 1743 @name.setter
1744 - def name(self, value):
1745 if not isinstance(value, csb.core.string): 1746 raise ValueError(('name cannot be {0} (accepted values: ' 1747 + 'strings)').format(value)) 1748 1749 self._name = value
1750 1751 @property
1752 - def seq(self):
1753 """ 1754 protein sequence of the entry 1755 1756 raises ValueError if C{value} is not a string 1757 1758 @rtype: string 1759 """ 1760 return self._seq
1761 1762 @seq.setter
1763 - def seq(self, value):
1764 if not isinstance(value, csb.core.string): 1765 raise ValueError(('seq cannot be {0} (accepted values: ' 1766 + 'strings)').format(value)) 1767 1768 self._seq = value
1769 1770 @property
1771 - def coords(self):
1772 """ 1773 entry coordinates in 3D space 1774 1775 raises ValueError if C{value} is not an iterable with 3 items 1776 1777 @rtype: string 1778 """ 1779 return self._coords
1780 1781 @coords.setter
1782 - def coords(self, value):
1783 if len(value) != 3: 1784 raise ValueError(('coords cannot be {0} (accepted values: ' 1785 + 'iteratables with 3 items)').format(value)) 1786 1787 self._coords = value
1788 1789 @property
1790 - def parent(self):
1791 """ 1792 L{Clans} instance that parents this L{ClansEntry} 1793 1794 @rtype: L{Clans} 1795 """ 1796 return self._parent
1797 1798 @property
1799 - def groups(self):
1800 """ 1801 L{ClansSeqgroup}s that contain the entry 1802 1803 @rtype: list 1804 """ 1805 return self._groups
1806 1807 @property
1808 - def hsp(self):
1809 """ 1810 connections between this and another L{ClansEntry} 1811 1812 @rtype: dict 1813 """ 1814 return self._hsp
1815
1816 - def get_id(self):
1817 """ 1818 Returns the id of the current entry. 1819 1820 Note: the first call to this method triggers L{Clans._update_index}, 1821 which will make it appear slower than successive calls. 1822 1823 @rtype: str 1824 @return: the entrys\' id is returned unless it has no parent in which 1825 case -1 is returned 1826 """ 1827 1828 if self.parent is None: 1829 return -1 1830 1831 if not self.parent._has_good_index: 1832 self.parent._update_index() 1833 1834 return self.parent._idx[self._get_unique_id()]
1835
1836 - def _get_unique_id(self):
1837 """ 1838 Returns a >>more or less<< unique ID (however this is not guaranteed to be 1839 really unique) consisting of the name, sequence, and coordinates of the entry. 1840 If two entries have the same 'unique' id,L{Clans._update_index} will raise a 1841 DuplicateEntryError. 1842 1843 @rtype: str 1844 @return: a more or less unique id 1845 """ 1846 return '{0.name}<###>{0.seq}<###>{0.coords}'.format(self)
1847
1848 - def initialize_coordinates(self):
1849 """Sets the coordinates to random numbers in [-1, 1]""" 1850 self.coords = random.random(3) * 2 - 1
1851
1852 - def add_hsp(self, other, value):
1853 """ 1854 Creates an HSP from self to other with the given value. 1855 1856 @param other: the other entry 1857 @type other: L{ClansEntry} instance 1858 1859 @param value: the value of the HSP 1860 @type value: float 1861 """ 1862 self.hsp[other] = value 1863 other.hsp[self] = value
1864
1865 - def remove_hsp(self, other):
1866 """ 1867 Removes the HSP between C{self} and C{other}; if none exists, does 1868 nothing. 1869 1870 @param other: the other entry 1871 @type other: L{ClansEntry} instance 1872 """ 1873 if other in self.hsp: 1874 self.hsp.pop(other) 1875 1876 if self in other.hsp: 1877 other.hsp.pop(self)
1878
1879 - def output_string_seq(self):
1880 """ 1881 Creates the CLANS <seq> block format representation of the entry. 1882 1883 @rtype: str 1884 @return: entrys\' representation in CLANS <seq> block format 1885 """ 1886 1887 return '>{0}\n{1}\n'.format(self.name, self.seq)
1888
1889 - def output_string_pos(self):
1890 """ 1891 Create the CLANS <pos> block format representation of the entry. 1892 1893 @rtype: str 1894 @return: entrys\' representation in CLANS <pos> block format 1895 """ 1896 return '{0} {1:.8f} {2:.8f} {3:.8f}'.format( 1897 *tuple([self.get_id()] + list(self.coords)))
1898
1899 - def output_string_hsp(self):
1900 """ 1901 Creates the CLANS <hsp> block format representation of the entry. 1902 1903 1904 @rtype: str 1905 @return: entrys\' representation in CLANS <hsp> block format 1906 """ 1907 return '\n'.join(['{0} {1}:{2:.8f}'.format(self.get_id(), 1908 other.get_id(), value) 1909 for (other, value) in self.hsp.items()])
1910
1911 1912 -class ClansSeqgroup(object):
1913 """ 1914 Class holding the data of one CLANS group (seqgroup). 1915 1916 @kwparam name: name of the seqgroup 1917 @type name: string 1918 1919 @kwparam type: symbol used to represent the seqgroup in the graphical 1920 output 1921 @type type: int 1922 1923 @kwparam size: size of the symbol used to represent the seqgroup in the 1924 graphical output 1925 @type size: int 1926 1927 @kwparam hide: if True, the seqgroup\'s symbols in the graphical output are 1928 not drawn; default: False 1929 @type hide: bool 1930 1931 @kwparam color: color of the seqgroup 1932 @type color: L{Color} or string formatted like \'x;y;z\' 1933 1934 @kwparam members: list of members of this seqgroup 1935 @type members: list 1936 """ 1937
1938 - def __init__(self, **kw):
1939 self._name = None 1940 self.name = kw.pop('name', 'NO NAME') 1941 1942 self._type = None 1943 self.type = kw.pop('type', 0) 1944 1945 self._size = None 1946 self.size = kw.pop('size', 4) 1947 1948 self._hide = None 1949 self.hide = kw.pop('hide', False) 1950 1951 self._color = None 1952 self.color = kw.pop('color', (255, 255, 255)) 1953 1954 self._members = [] 1955 if 'members' in kw: 1956 for member in kw['members']: 1957 self.add(member)
1958
1959 - def __repr__(self):
1960 return ('ClansSeqgroup {0.name}: type: {0.type}; size: {0.size}; hide:' 1961 + ' {0.hide}; color: {1}; #members: {2}').format( 1962 self, self.color.to_clans_color(), len(self.members))
1963
1964 - def __len__(self):
1965 return len(self.members)
1966 1967 @property
1968 - def name(self):
1969 """ 1970 name of the seqgroup 1971 1972 raises ValueError if C{value} is no string 1973 1974 @rtype: string 1975 """ 1976 return self._name
1977 1978 @name.setter
1979 - def name(self, value):
1980 if not isinstance(value, csb.core.string): 1981 raise ValueError('name must be a string') 1982 self._name = value
1983 1984 @property
1985 - def type(self):
1986 """ 1987 symbol used to represent the seqgroup in the graphical output 1988 1989 raises ValueError if C{value} is not castable to int 1990 1991 @rtype: int 1992 """ 1993 return self._type
1994 1995 @type.setter
1996 - def type(self, value):
1997 self._type = int(value)
1998 1999 @property
2000 - def size(self):
2001 """ 2002 size of the symbol used to represent the seqgroup in the graphical 2003 output 2004 2005 raises ValueError if C{value} is not castable to int 2006 2007 @rtype: int 2008 """ 2009 return self._size
2010 2011 @size.setter
2012 - def size(self, value):
2013 self._size = int(value)
2014 2015 @property
2016 - def hide(self):
2017 """ 2018 if True, the seqgroup\'s symbols in the graphical output are not drawn 2019 2020 raises ValueError if C{value} is no bool 2021 2022 @rtype: int 2023 """ 2024 return self._hide
2025 2026 @hide.setter
2027 - def hide(self, value):
2028 if not isinstance(value, bool): 2029 raise ValueError(('hide cannot be {0} (accepted values: ' 2030 + 'True/False)').format(value)) 2031 2032 self._hide = value
2033 2034 @property
2035 - def color(self):
2036 """ 2037 color of the seqgroup 2038 2039 raises ValueError if set to a wrongly formatted string (correct: 2040 \'{r};{g};{b}\') 2041 2042 @rtype: L{Color} 2043 """ 2044 return self._color
2045 2046 @color.setter
2047 - def color(self, value, separator=';'):
2048 # set values to those of existing Color instance 2049 if isinstance(value, Color): 2050 self._color = value 2051 return 2052 2053 ## parse color from string in format 'r;g;b' 2054 if isinstance(value, csb.core.string): 2055 self._color = Color.from_string(value) 2056 return 2057 2058 # parse 3-item iterables like (3, 5, 6) 2059 if len(value) == 3: 2060 self._color = Color(*tuple(map(int, value))) 2061 return 2062 2063 raise ValueError('cannot parse color from \'{0}\''.format(value))
2064 2065 2066 @property
2067 - def members(self):
2068 """ 2069 the members of this seqgroup 2070 2071 @rtype: list 2072 """ 2073 return self._members
2074
2075 - def is_empty(self):
2076 """ 2077 Checks if the group contains entries. 2078 2079 @rtype: bool 2080 @return: True if the group contains no entries, else False. 2081 """ 2082 return len(self) == 0
2083
2084 - def add(self, new_member):
2085 """ 2086 Adds entry C{new_member} to this L{ClansSeqgroup}. 2087 2088 @Note: L{ClansEntry}-s added using this method that are not part of the 2089 main L{Clans} instance need to be added to the L{Clans} instance manually. 2090 2091 @param new_member: the member that shall be added to this 2092 L{ClansSeqgroup} 2093 @type new_member: L{ClansEntry} instance 2094 2095 @raise TypeError: if C{new_member} is no L{ClansEntry} instance 2096 @raise ValueError: if C{new_member} is already contained in this 2097 L{ClansSeqgroup} 2098 """ 2099 if not isinstance(new_member, ClansEntry): 2100 raise TypeError('only ClansEntry instances can be added as ' + 2101 'group members') 2102 2103 if self.members.count(new_member) > 0: 2104 raise ValueError(('entry {0.name} is already contained in this ' 2105 + 'seqgroup').format(new_member)) 2106 2107 self.members.append(new_member) 2108 new_member.groups.append(self)
2109
2110 - def remove(self, member):
2111 """ 2112 Removes L{ClansEntry} C{member} from this group. 2113 2114 @param member: the member to be removed 2115 @type member: a L{ClansEntry} instance 2116 2117 @raise TypeError: if C{member} is no L{ClansEntry} instance 2118 @raise ValueError: if C{member} is not part of this L{ClansSeqgroup} 2119 """ 2120 if not isinstance(member, ClansEntry): 2121 raise TypeError('argument must be a ClansEntry instance') 2122 2123 if self.members.count(member) == 0: 2124 raise ValueError(('"{0.name}" is not a member of this ' 2125 + 'seqgroup').format(member)) 2126 2127 self.members.remove(member) 2128 member.groups.remove(self)
2129
2130 - def output_string(self):
2131 """ 2132 Creates the CLANS <seqgroup> block format representation of the 2133 group. 2134 2135 @rtype: str 2136 @return: entrys\' representation in CLANS <seqgroup> block format 2137 """ 2138 sorted_members = sorted([m.get_id() for m in self.members]) 2139 return ('name={0.name}\ntype={0.type}\nsize={0.size}\nhide={1}' 2140 + '\ncolor={2}\nnumbers={3}').format( 2141 self, int(self.hide), self.color.to_clans_color(), 2142 ';'.join([str(val) for val in sorted_members]) + ';')
2143