Package csb :: Package bio :: Package io :: Module clans
[frames] | no frames]

Source Code for Module csb.bio.io.clans

   1  """ 
   2  Classes for parsing/manipulating/writing CLANS (by Tancred Frickey) files 
   3   
   4  Author: Klaus Kopec 
   5  MPI fuer Entwicklungsbiologie, Tuebingen 
   6  """ 
   7  import os 
   8  import re 
   9  import operator 
  10  import csb.core 
  11   
  12  from abc import ABCMeta, abstractmethod 
  13  from numpy import array, float64, eye, random 
14 15 16 -class MissingBlockError(Exception):
17 """ 18 Raised if an expected tag is not found during parsing of a CLANS file. 19 """ 20 pass
21
22 23 -class UnknownTagError(ValueError):
24 """ 25 Raised if an unknown tag is encountered while parsing a CLANS file. 26 """ 27 pass
28
29 30 -class Color(object):
31 """ 32 RGB color handling class. 33 Color is stored as r, g, and b attributes. 34 Default color is C{r}=C{g}=C{b}=0 (i.e. black) 35 36 @param r: the red value 37 @type r: int 38 39 @param g: the green value 40 @type g: int 41 42 @param b: the blue value 43 @type b: int 44 """ 45
46 - def __init__(self, r=0, g=0, b=0):
47 self._r = None 48 self.r = r 49 self._g = None 50 self.g = g 51 self._b = None 52 self.b = b
53
54 - def __repr__(self):
55 return 'Color {0}'.format(self.to_clans_color())
56 57 __str__ = __repr__ 58 59 @staticmethod
60 - def from_string(color_string, separator=';'):
61 """ 62 Factory for a Color instance created from a string formatted as r{separator}g{separator}b 63 64 @param color_string: a string containing colors in format r{separator}g{separator}b 65 @type color_string: str 66 67 @raises TypeError: if {color_string} is not a string 68 @raises ValueError: if any value in color is outside of range(256) 69 """ 70 if not isinstance(color_string, csb.core.string): 71 raise TypeError('{0} is no string'.format(color_string)) 72 73 if color_string.count(separator) != 2: 74 raise ValueError( 75 ('format needs to be \'r{0}g{0}b\' but color_string was ' + 76 '{1}').format(separator, color_string)) 77 78 r, g, b = map(int, color_string.split(';')) 79 return Color(r, g, b)
80 81 @property
82 - def r(self):
83 """ 84 the red value of the RGB color. 85 86 raises ValueError if C{value} is outside of range(256) 87 88 @rtype: int 89 """ 90 return self._r
91 92 @r.setter
93 - def r(self, value):
94 """ 95 Set the red value of the RGB color. 96 """ 97 if value < 0 or value > 255: 98 raise ValueError( 99 'valid color values are in range(256), was \'{0}\''.format( 100 value)) 101 102 self._r = value
103 104 @property
105 - def g(self):
106 """ 107 the green value of the RGB color. 108 109 raises ValueError if C{value} is outside of range(256) 110 111 @rtype: int 112 """ 113 return self._g
114 115 @g.setter
116 - def g(self, value):
117 118 if value < 0 or value > 255: 119 raise ValueError('valid color values are in range(256).') 120 121 self._g = value
122 123 @property
124 - def b(self):
125 """ 126 the blue value of the RGB color. 127 128 raises ValueError if C{value} is outside of range(256) 129 130 @rtype: int 131 """ 132 return self._b
133 134 @b.setter
135 - def b(self, value):
136 137 if value < 0 or value > 255: 138 raise ValueError('valid color values are in range(256).') 139 140 self._b = value
141
142 - def to_clans_color(self):
143 """ 144 Formats the color for use in CLANS files. 145 146 @return: the color formatted for use in CLANS files; format: r;g;b 147 @rtype: str 148 """ 149 return '{0.r};{0.g};{0.b}'.format(self)
150
151 152 -class ClansParser(object):
153 """ 154 CLANS file format aware parser. 155 """ 156
157 - def __init__(self):
158 self._clans_instance = None 159 self._data_block_dict = {}
160
161 - def __repr__(self):
162 return 'ClansParser instance'
163 164 __str__ = __repr__ 165 166 @property
167 - def clans_instance(self):
168 """ 169 the L{Clans} instance that resulted from parsing a CLANS file. 170 171 raises a ValueError if no CLANS file has been parsed yet 172 173 @rtype: L{Clans} instance 174 """ 175 if self._clans_instance is None: 176 raise ValueError('you need to parse a CLANS file first') 177 178 return self._clans_instance
179
180 - def parse_file(self, filename, permissive=True):
181 """ 182 Create a L{Clans} instance by parsing the CLANS format file C{filename} 183 184 @param filename: name of the CLANS file. 185 @type filename: str 186 187 @param permissive: if True, tolerate missing non-essential or unknown 188 blocks. 189 @type permissive: bool 190 191 @rtype: L{Clans} instance 192 @return: a L{Clans} instance containing the parsed data 193 194 @raise MissingBlockError: if C{permissive == True} and any essential 195 block is missing. if C{permissive == False} and any block is missing 196 @raise UnknownTagError: if C{permissive == False} and an unknown tag/ 197 data block is encountered 198 """ 199 self._clans_instance = Clans() 200 self._clans_instance._filename = filename 201 202 self._read_block_dict() # read and preprocess the CLANS file 203 204 try: # param and rotmtx are non-essential blocks 205 self._parse_param() 206 self._parse_rotmtx() 207 except MissingBlockError as error: 208 if not permissive: 209 raise MissingBlockError(error) 210 211 seq = {} 212 try: 213 seq = self._parse_seq() 214 except MissingBlockError as error: 215 if not permissive: 216 raise MissingBlockError(error) 217 218 seqgroups = self._parse_seqgroups() 219 220 pos = {} 221 try: 222 pos = self._parse_pos() 223 except MissingBlockError as error: 224 if not permissive: 225 raise MissingBlockError(error) 226 227 hsp_att_mode = "hsp" 228 hsp = {} 229 try: 230 if 'hsp' in self._data_block_dict: 231 hsp = self._parse_hsp_att('hsp') 232 233 elif 'att' in self._data_block_dict: 234 hsp_att_mode = "att" 235 hsp = self._parse_hsp_att('att') 236 237 elif 'mtx' in self._data_block_dict: 238 hsp = self._parse_mtx() 239 240 except MissingBlockError as error: 241 if not permissive: 242 raise MissingBlockError(error) 243 244 ## raise UnknownTagError for unknown blocks 245 known_block_tags = set(('param', 'rotmtx', 'seq', 'seqgroups', 'pos', 246 'hsp', 'mtx', 'att')) 247 unprocessed_block_tags = set(self._data_block_dict.keys()).difference( 248 known_block_tags) 249 250 if len(unprocessed_block_tags) > 0 and not permissive: 251 raise UnknownTagError( 252 ('tags unknown: {0}. File corrupt or further implementations ' 253 + 'needed!').format(', '.join(unprocessed_block_tags))) 254 255 ## if no entries exist, we cannot add pos, seqgroup and hsp data 256 if len(seq) > 0: 257 258 ## add Entries 259 if len(pos) > 0: 260 self._clans_instance._entries = [ 261 ClansEntry(seq[i][0], seq[i][1], 262 pos[i], parent=self._clans_instance) 263 for i in pos] 264 265 ## add groups 266 self._clans_instance._seqgroups = ClansSeqgroupCollection() 267 if len(seqgroups) > 0: 268 for group_raw_data in seqgroups: 269 270 group = ClansSeqgroup(name=group_raw_data['name'], 271 type=group_raw_data['type'], 272 size=group_raw_data['size'], 273 hide=group_raw_data['hide'] == 1, 274 color=group_raw_data['color']) 275 276 ## get members corresponding to the IDs in this group 277 ## NOTE: this silently corrects files where a seqgroup 278 ## contains the same entry multiple times 279 members = [self._clans_instance.entries[number] 280 for number in set(group_raw_data['numbers'])] 281 282 self._clans_instance.add_group(group, members) 283 284 ## add hsp values 285 if len(hsp) > 0: 286 [self._clans_instance.entries[a].add_hsp( 287 self._clans_instance.entries[b], value) 288 for ((a, b), value) in hsp.items()] 289 290 self._clans_instance._hsp_att_mode = hsp_att_mode 291 292 return self._clans_instance
293
294 - def _read_block_dict(self):
295 """ 296 Extracts all <tag>DATA</tag> blocks from file 297 self.clans_instance.filename. 298 299 @rtype: dict 300 @return: data in the form: dict[tag] = DATA. 301 """ 302 # read file and remove the first line, i.e. sequence=SEQUENCE_COUNT 303 data_blocks = open(os.path.expanduser( 304 self._clans_instance.filename)).read().split('\n', 1)[1] 305 306 ## flag re.DOTALL is necessary to make . match newlines 307 data = re.findall(r'(<(\w+)>(.+)</\2>)', data_blocks, 308 flags=re.DOTALL) 309 self._data_block_dict = dict([(tag, datum.strip().split('\n')) 310 for _tag_plus_data, tag, datum in data])
311
312 - def _parse_param(self):
313 """ 314 Parse a list of lines in the CLANS <param> format: 315 316 parameter1=data1\n 317 parameter2=data2\n 318 ... 319 """ 320 if 'param' not in self._data_block_dict: 321 raise MissingBlockError('file contains no <param> block.') 322 323 block = self._data_block_dict['param'] 324 325 tmp_params = dict([block[i].split('=') for i in range(len(block))]) 326 327 ## create colors entry from colorcutoffs and colorarr 328 colorcutoffs = [float(val) for val in 329 tmp_params.pop('colorcutoffs').strip(';').split(';')] 330 colors = tmp_params.pop('colorarr').strip(':') 331 colors = colors.replace('(', '').replace(')', '').split(':') 332 colorarr = [Color(*map(int, color_definition)) for color_definition in 333 [color.split(';') for color in colors]] 334 335 tmp_params['colors'] = dict(zip(colorcutoffs, colorarr)) 336 337 ## convert 'true' and 'false' into Python bools 338 for k, v in tmp_params.items(): 339 if v == 'true': 340 tmp_params[k] = True 341 elif v == 'false': 342 tmp_params[k] = False 343 344 self._clans_instance._params = ClansParams(strict=False, **tmp_params)
345
346 - def _parse_rotmtx(self):
347 """ 348 Parse a list of lines in the CLANS <rotmtx> format. The data is stored 349 in the clans_instance as a 3x3 numpy.array. 350 351 @raise ValueError: if the rotmtx block does not contain exactly 3 lines 352 """ 353 if 'rotmtx' not in self._data_block_dict: 354 raise MissingBlockError('file contains no <rotmtx> block.') 355 356 block = self._data_block_dict['rotmtx'] 357 358 if len(block) != 3: 359 raise ValueError('CLANS <rotmtx> blocks comprise exactly 3 lines.') 360 self._clans_instance.rotmtx = array( 361 [[float64(val) for val in line.split(';')[:3]] for line in block])
362
363 - def _parse_seq(self):
364 """ 365 Parse a list of lines in the CLANS <seq> format, which are in FASTA 366 format. 367 368 @rtype: dict 369 @return: dict with running numbers as key and 2-tuples (id, sequence) 370 as values 371 """ 372 if 'seq' not in self._data_block_dict: 373 raise MissingBlockError( 374 'file contains no <seq> block. This is OK if the file does ' 375 + 'not contain any sequences.') 376 377 block = self._data_block_dict['seq'] 378 if len(block) % 2 == 1: 379 block += [''] 380 381 return dict([(i, (block[2 * i][1:], block[2 * i + 1].strip())) 382 for i in range(int(len(block) / 2))])
383
384 - def _parse_seqgroups(self):
385 """ 386 Parse a list of lines in the CLANS <seqgroup> format: 387 388 name=name of the group\n 389 type=0\n 390 size=12\n 391 hide=0\n 392 color=255;204;51\n 393 numbers=0;1;2;3;4;5;6;10;13\n 394 ... 395 396 @rtype: list 397 @return: list of dicts (one for each group) with the tags (name, type, 398 size, hide, ...) as keys and their typecasted data as values 399 (i.e. name will be a string, size will be an integer, etc) 400 """ 401 if 'seqgroups' not in self._data_block_dict: 402 return ClansSeqgroupCollection() 403 404 block = self._data_block_dict['seqgroups'] 405 406 groups = [] 407 for line in block: 408 p, v = line.split('=') 409 if p == 'name': 410 groups.append({'name': v}) 411 elif p == 'numbers': 412 groups[-1][p] = [int(val) for val in v.split(';')[:-1]] 413 else: 414 groups[-1][p] = v 415 return groups
416
417 - def _parse_pos(self):
418 """ 419 Parse a list of lines in the CLANS <pos> format \'INT FLOAT FLOAT 420 FLOAT\'. 421 422 @rtype: dict 423 @return: a dict using the integers as keys and a (3,1)-array created 424 from the three floats as values. 425 """ 426 if 'pos' not in self._data_block_dict: 427 raise MissingBlockError( 428 'file contains no <pos> block. This is OK if the file does ' 429 + 'not contain any sequences.') 430 431 block = self._data_block_dict['pos'] 432 433 return dict([(int(l.split()[0]), 434 array([float64(val) for val in l.split()[1:]])) 435 for l in block])
436
437 - def _parse_hsp_att(self, mode):
438 """ 439 Parse a list of lines in the CLANS <hsp> format \'INT INT: FLOAT\'. 440 441 NOTE: some CLANS <hsp> lines contain more than one float; we omit the 442 additional numbers 443 444 @param mode: either "hsp" or "att" depending on the type of tag to be 445 parsed 446 @type mode: str 447 448 @rtype: dict 449 @return: a dict using 2-tuples of the two integers as keys and the 450 float as values 451 """ 452 if mode not in ("hsp", "att"): 453 raise ValueError('mode must be either "hsp" or "att"') 454 455 if mode not in self._data_block_dict: 456 raise MissingBlockError( 457 ('file contains no <{0}> block. This is OK if the file does ' 458 + 'not contain any sequences or if none of the contained ' 459 + 'sequences have any connections.').format(mode)) 460 461 block = self._data_block_dict[mode] 462 463 if mode == "hsp": 464 return dict([(tuple([int(val) 465 for val in line.split(':')[0].split()]), 466 float(line.split(':')[1].split(' ')[0])) 467 for line in block]) 468 469 else: 470 return dict([(tuple([int(val) for val in line.split(' ')[:2]]), 471 float(line.split(' ')[2])) 472 for line in block])
473
474 - def _parse_mtx(self):
475 """ 476 Parse a list of lines in the CLANS <mtx> format. 477 478 @rtype: dict 479 @return: a dict using 2-tuples of the two integers as keys and the 480 float as values 481 """ 482 if 'mtx' not in self._data_block_dict: 483 raise MissingBlockError( 484 'file contains no <mtx> block. This is OK if the file does ' 485 + 'not contain any sequences or if none of the contained ' 486 + 'sequences have any connections.') 487 488 block = self._data_block_dict['mtx'] 489 490 return dict([((i, j), float(entry)) 491 for i, line in enumerate(block) 492 for j, entry in enumerate(line.split(';')[:-1]) 493 if float(entry) != 0])
494
495 -class ClansFileBuilder(object):
496 """ 497 Base abstract files for building a file in CLANS format. 498 Defines a common step-wise interface according to the Builder pattern. 499 500 @param output: output stream (this is where the product is constructed) 501 @type output: stream 502 """ 503 504 __metaclass__ = ABCMeta 505
506 - def __init__(self, output):
507 508 if not hasattr(output, 'write'): 509 raise TypeError(output) 510 511 self._out = output
512 513 @property
514 - def output(self):
515 """ 516 Destination stream 517 @rtype: stream 518 """ 519 return self._out
520
521 - def write(self, text):
522 """ 523 Write a chunk of text 524 """ 525 self._out.write(text)
526
527 - def writeline(self, text):
528 """ 529 Write a chunk of text and append a new line terminator 530 """ 531 self._out.write(text) 532 self._out.write('\n')
533 534 @abstractmethod
535 - def add_param_block(self, block_data):
536 pass
537 538 @abstractmethod
539 - def add_rotmtx_block(self, block_data):
540 pass
541 542 @abstractmethod
543 - def add_seq_block(self, block_data):
544 pass
545 546 @abstractmethod
547 - def add_seqgroups_block(self, block_data):
548 pass
549 550 @abstractmethod
551 - def add_pos_block(self, block_data):
552 pass
553 554 @abstractmethod
555 - def add_hsp_block(self, block_data):
556 pass
557
558 559 -class ClansFileWriter(ClansFileBuilder):
560 """ 561 Class for serializing a L{Clans} instance to a file in CLANS format. 562 563 @param output: the output stream 564 @type output: stream 565 """ 566 567
568 - def __init__(self, output):
569 super(ClansFileWriter, self).__init__(output)
570
571 - def serialize(self, clans_instance):
572 """ 573 Creates a CLANS file containing all data from {clans_instance} 574 575 @param clans_instance: the source of the data to be serialized 576 @type clans_instance: a L{Clans} instance 577 """ 578 579 self.add_sequences_line(clans_instance) 580 self.add_param_block(clans_instance) 581 self.add_rotmtx_block(clans_instance) 582 self.add_seq_block(clans_instance) 583 self.add_seqgroups_block(clans_instance) 584 self.add_pos_block(clans_instance) 585 self.add_hsp_block(clans_instance)
586
587 - def add_sequences_line(self, clans_instance):
588 """ 589 Appends the \'sequences=<#sequences>\' line to {output}. 590 591 @param clans_instance: the source of the data to be serialized 592 @type clans_instance: a L{Clans} instance 593 """ 594 self.writeline('sequences={0}'.format(len(clans_instance.entries)))
595
596 - def add_param_block(self, clans_instance):
597 """ 598 Appends a <param>data</param> CLANS file block to {output}. 599 600 @param clans_instance: the source of the data to be serialized 601 @type clans_instance: a L{Clans} instance 602 """ 603 param_block = clans_instance.params._to_clans_param_block() 604 self.write(param_block)
605
606 - def add_rotmtx_block(self, clans_instance):
607 """ 608 Appends a <rotmtx>data</rotmtx> CLANS file block to {output}. 609 610 @param clans_instance: the source of the data to be serialized 611 @type clans_instance: a L{Clans} instance 612 613 @raise ValueError: if self.clans_instance.rotmtx is no 3x3 numpy.array 614 """ 615 rotmtx = clans_instance.rotmtx 616 617 if rotmtx is None: 618 return 619 620 if rotmtx.shape != (3, 3): 621 raise ValueError('rotmtx must be a 3x3 array') 622 623 self.writeline('<rotmtx>') 624 625 self.write('\n'.join( 626 ['{0};{1};{2};'.format(*tuple(rotmtx[i])) for i in range(3)])) 627 self.write('\n') 628 629 self.writeline('</rotmtx>')
630
631 - def add_seq_block(self, clans_instance):
632 """ 633 Appends a <seq>data</seq> CLANS file block to {output}. 634 635 @param clans_instance: the source of the data to be serialized 636 @type clans_instance: a L{Clans} instance 637 """ 638 self.writeline('<seq>') 639 640 self.write(''.join([e.output_string_seq() 641 for e in clans_instance.entries])) 642 643 self.writeline('</seq>')
644
645 - def add_seqgroups_block(self, clans_instance):
646 """ 647 Appends a <seqgroupsparam>data</seqgroups> CLANS file block to {output}. 648 649 @param clans_instance: the source of the data to be serialized 650 @type clans_instance: a L{Clans} instance 651 """ 652 seqgroups = clans_instance.seqgroups 653 654 if seqgroups is not None and len(seqgroups) > 0: 655 656 self.writeline('<seqgroups>') 657 658 self.write('\n'.join([s.output_string() for s in seqgroups])) 659 self.write('\n') 660 661 self.writeline('</seqgroups>')
662
663 - def add_pos_block(self, clans_instance):
664 """ 665 Appends a <pos>data</pos> CLANS file block to {output}. 666 667 @param clans_instance: the source of the data to be serialized 668 @type clans_instance: a L{Clans} instance 669 """ 670 self.writeline('<pos>') 671 672 self.write('\n'.join([e.output_string_pos() 673 for e in clans_instance.entries])) 674 self.write('\n') 675 676 self.writeline('</pos>')
677
678 - def add_hsp_block(self, clans_instance):
679 """ 680 Appends a <hsp>data</hsp> CLANS file block to {output}. 681 If the CLANS instance has hsp_att_mode=="att" we add a <att>data<att> 682 block which has the same format. 683 684 @param clans_instance: the source of the data to be serialized 685 @type clans_instance: a L{Clans} instance 686 """ 687 688 self.writeline('<{0}>'.format(clans_instance._hsp_att_mode)) 689 690 ## sorting is not necessary, but makes a nicer looking clans file 691 idToEntryMapping = [(e.get_id(), e) 692 for e in clans_instance.entries] 693 idToEntryMapping.sort(key=operator.itemgetter(0)) 694 entryToIdMapping = dict([(entry, identifier) 695 for (identifier, entry) in idToEntryMapping]) 696 697 for i, (entry1_id, entry1) in enumerate(idToEntryMapping): 698 699 ## sort list of hsp targets by id 700 hspTargets = [(entryToIdMapping[entry2], pvalue) 701 for (entry2, pvalue) in entry1.hsp.items()] 702 hspTargets.sort(key=operator.itemgetter(0)) 703 704 for (entry2_id, pvalue) in hspTargets: 705 if entry1_id >= entry2_id: 706 continue 707 708 line_format = '{0} {1}:{2}\n' 709 if clans_instance._hsp_att_mode == "att": 710 line_format = '{0} {1} {2}\n' 711 712 self.write( 713 line_format.format(entry1_id, entry2_id, repr(pvalue))) 714 715 self.writeline('</{0}>'.format(clans_instance._hsp_att_mode))
716
717 718 -class ClansEntryGiComparator(object):
719 """ 720 Comparator for two L{ClansEntry}s. 721 Comparison is based on \'gi|\' numbers and residue ranges parsed from 722 L{ClansEntry}.name attributes if they can be parsed from it. Otherwise 723 the complete name is used. 724 725 @raise ValueError: if a residue range contains no terminal residue 726 """ 727
728 - def __init__(self):
729 self._mapping = {} # mapping cache for faster access
730
731 - def __call__(self, entry1, entry2):
732 if entry1.name in self._mapping: 733 entry1_parsed = self._mapping[entry1.name] 734 else: 735 entry1_parsed = self._parse_entry_name(entry1.name) 736 self._mapping[entry1.name] = entry1_parsed 737 738 if entry2.name in self._mapping: 739 entry2_parsed = self._mapping[entry2.name] 740 else: 741 entry2_parsed = self._parse_entry_name(entry2.name) 742 self._mapping[entry2.name] = entry2_parsed 743 744 if entry1_parsed == entry2_parsed: 745 return True 746 747 if len(entry1_parsed) == 3 and len(entry2_parsed) == 3: 748 A = dict(zip(('gi', 'start', 'end'), entry1_parsed)) 749 B = dict(zip(('gi', 'start', 'end'), entry2_parsed)) 750 751 if A['gi'] != B['gi']: # different gi numbers 752 return False 753 754 ## switch so that A is the one that starts earlier 755 756 if A['start'] > B['start']: 757 A, B = B, A 758 759 common_residues = A['end'] - B['start'] 760 if common_residues < 0: 761 return False # B starts after A ends 762 763 if B['end'] < A['end']: 764 return True # A starts before B and ends after it => B is in A 765 766 ## > 75% of length of the shorter one are shared => identical 767 if common_residues > 0.75 * min(A['end'] - A['start'], 768 B['end'] - B['start']): 769 return True 770 return False
771
772 - def _parse_entry_name(self, name):
773 start = name.find('gi|') 774 if start == -1: 775 return name 776 real_start = start + 3 777 name = name[real_start:] 778 779 gi_number = name.split('|', 1)[0] 780 781 next_gi_start = name[real_start:].find('gi|') 782 783 if next_gi_start != -1: 784 name = name[:next_gi_start] 785 786 initial_residue_number = name.find('(') 787 if initial_residue_number == -1: 788 return gi_number 789 790 start = name[initial_residue_number + 1:].split('-') 791 ## if start is no integer, assume '(' is not the start of a range 792 try: 793 start = int(start[0]) 794 except ValueError: 795 return gi_number 796 797 residues_end = name.find(':') 798 if residues_end == -1: 799 ## some entries are not (x-y:z), but only (x-y) 800 residues_end = name.find(')') 801 if residues_end == -1: 802 raise ValueError( 803 'no end residue found in name\n\t{0}'.format(name)) 804 805 potential_start_and_end = name[:residues_end].split('-') 806 807 if len(potential_start_and_end) != 2: 808 return gi_number 809 try: 810 first_res, last_res = [int(val) for val in potential_start_and_end] 811 except ValueError: 812 return gi_number 813 814 return (gi_number, int(first_res), int(last_res))
815
816 817 -class ClansParams(object):
818 """ 819 Class for handling L{Clans} parameters. 820 See L{ClansParams}._DEFAULTS for accepted parameter names. 821 822 @kwparam **kw: parameters as C{kw[parameter_name] = parameter_value} 823 824 @raise KeyError: if a supplied parameter name is not known 825 (i.e. it is not a key in _DEFAULTS) 826 """ 827 828 _DEFAULTS = {'attfactor': 10.0, 829 'attvalpow': 1, 830 'avgfoldchange': False, 831 'blastpath': 'blastall -p blastp', 832 'cluster2d': False, 833 'colors': {0.0: (230, 230, 230), 834 0.1: (207, 207, 207), 835 0.2: (184, 184, 184), 836 0.3: (161, 161, 161), 837 0.4: (138, 138, 138), 838 0.5: (115, 115, 115), 839 0.6: (92, 92, 92), 840 0.7: (69, 69, 69), 841 0.8: (46, 46, 46), 842 0.9: (23, 23, 23)}, 843 'complexatt': True, 844 'cooling': 1.0, 845 'currcool': 1.0, 846 'dampening': 0.2, 847 'dotsize': 2, 848 'formatdbpath': 'formatdb', 849 'groupsize': 4, 850 'maxmove': 0.1, 851 'minattract': 1.0, 852 'ovalsize': 10, 853 'pval': 1.0, 854 'repfactor': 5.0, 855 'repvalpow': 1, 856 'showinfo': True, 857 'usefoldchange': False, 858 'usescval': False, 859 'zoom': 1.0} 860
861 - def __init__(self, strict=True, **kw):
862 self.set_default_params() 863 864 for param_name, param_value in kw.items(): 865 if param_name not in self._DEFAULTS and strict: 866 raise KeyError('parameter {0} (value: {1}) unknown'.format( 867 param_name, param_value)) 868 self.__setattr__(param_name, param_value)
869 870 @property
871 - def complexatt(self):
872 """ 873 if True, complex attraction computations are used. 874 875 raises ValueError if set to non-boolean value 876 877 @rtype: bool 878 """ 879 return self._complexatt
880 881 @complexatt.setter
882 - def complexatt(self, value):
883 if not isinstance(value, bool): 884 raise ValueError(('complexatt cannot be {0} (accepted values: True' 885 + '/False)').format(value)) 886 self._complexatt = value
887 888 @property
889 - def attfactor(self):
890 """ 891 factor in the attractive force 892 893 raises ValueError if C{value} is not castable to float 894 895 @rtype: float 896 """ 897 return self._attfactor
898 899 @attfactor.setter
900 - def attfactor(self, value):
901 self._attfactor = float(value)
902 903 @property
904 - def attvalpow(self):
905 """ 906 exponent in the attractive force 907 908 raises ValueError if C{value} is not castable to float 909 910 @rtype: float 911 """ 912 return self._attvalpow
913 914 @attvalpow.setter
915 - def attvalpow(self, value):
916 self._attvalpow = float(value)
917 918 @property
919 - def repfactor(self):
920 """ 921 factor in the repulsive force 922 923 raises ValueError if C{value} is not castable to float 924 925 @rtype: float 926 """ 927 return self._repfactor
928 929 @repfactor.setter
930 - def repfactor(self, value):
931 self._repfactor = float(value)
932 933 @property
934 - def repvalpow(self):
935 """ 936 exponent in the repulsive force 937 938 raises ValueError if C{value} is not castable to float 939 940 @rtype: float 941 """ 942 return self._repvalpow
943 944 @repvalpow.setter
945 - def repvalpow(self, value):
946 self._repvalpow = float(value)
947 948 @property
949 - def cluster2d(self):
950 """ 951 if True, clustering is done in 2D. Else in 3D. 952 953 raises ValueError if set to non-boolean value 954 955 @rtype: bool 956 """ 957 return self._cluster2d
958 959 960 @cluster2d.setter
961 - def cluster2d(self, value):
962 if not isinstance(value, bool): 963 raise ValueError(('cluster2d cannot be {0} (accepted values: True' 964 + '/False)').format(value)) 965 966 self._cluster2d = value
967 968 @property
969 - def pval(self):
970 """ 971 p-value cutoff that determines which connections are considered for 972 the attractive force 973 974 raises ValueError if C{value} is not castable to float 975 976 @rtype: float 977 """ 978 return self._pval
979 980 @pval.setter
981 - def pval(self, value):
982 self._pval = float(value)
983 984 @property
985 - def maxmove(self):
986 """ 987 maximal sequence (i.e. dot in the clustermap) movement per round 988 989 raises ValueError if C{value} is not castable to float 990 991 @rtype: float 992 """ 993 return self._maxmove
994 995 @maxmove.setter
996 - def maxmove(self, value):
997 self._maxmove = float(value)
998 999 @property
1000 - def usescval(self):
1001 """ 1002 parameter with unclear function. Check in Clans. 1003 1004 raises ValueError if set to non-boolean value 1005 1006 @rtype: bool 1007 """ 1008 return self._usescval
1009 1010 @usescval.setter
1011 - def usescval(self, value):
1012 if not isinstance(value, bool): 1013 raise ValueError(('usescval cannot be {0} (accepted values: True' 1014 + '/False)').format(value)) 1015 1016 self._usescval = value
1017 1018 @property
1019 - def cooling(self):
1020 """ 1021 parameter with unclear function. Check in Clans. 1022 1023 raises ValueError if C{value} is not castable to float 1024 1025 @rtype: float 1026 """ 1027 return self._cooling
1028 1029 @cooling.setter
1030 - def cooling(self, value):
1031 self._cooling = float(value)
1032 1033 @property
1034 - def currcool(self):
1035 """ 1036 parameter with unclear function. Check in Clans. 1037 1038 raises ValueError if C{value} is not castable to float 1039 1040 @rtype: float 1041 """ 1042 return self._currcool
1043 1044 @currcool.setter
1045 - def currcool(self, value):
1046 self._currcool = float(value)
1047 1048 @property
1049 - def dampening(self):
1050 """ 1051 parameter with unclear function. Check in Clans. 1052 1053 raises ValueError if C{value} is not castable to float 1054 1055 @rtype: float 1056 """ 1057 return self._dampening
1058 1059 @dampening.setter
1060 - def dampening(self, value):
1061 self._dampening = float(value)
1062 1063 @property
1064 - def minattract(self):
1065 """ 1066 parameter with unclear function. Check in Clans. 1067 1068 raises ValueError if C{value} is not castable to float 1069 1070 @rtype: float 1071 """ 1072 return self._minattract
1073 1074 @minattract.setter
1075 - def minattract(self, value):
1076 self._minattract = float(value)
1077 1078 @property
1079 - def blastpath(self):
1080 """ 1081 path to the BLAST executable for protein-protein comparisons. BLAST+ is 1082 currently not supported by Clans. 1083 1084 raises ValueError if C{value} is not a string 1085 1086 @rtype: str 1087 """ 1088 return self._blastpath
1089 1090 @blastpath.setter
1091 - def blastpath(self, value):
1092 if not isinstance(value, csb.core.string): 1093 raise ValueError(('blastpath cannot be {0} (accepted values: ' 1094 + 'strings)').format(value)) 1095 1096 self._blastpath = value
1097 1098 @property
1099 - def formatdbpath(self):
1100 """ 1101 path to the formatdb executable of BLAST. 1102 1103 raises ValueError if C{value} is not a string 1104 1105 @rtype: str 1106 """ 1107 return self._formatdbpath
1108 1109 @formatdbpath.setter
1110 - def formatdbpath(self, value):
1111 if not isinstance(value, csb.core.string): 1112 raise ValueError(('formatdbpath cannot be {0} (accepted values: ' 1113 + 'strings)').format(value)) 1114 1115 self._formatdbpath = value
1116 1117 @property
1118 - def showinfo(self):
1119 """ 1120 if True, additional data (rotation matrix) is shown in the clustring 1121 window) 1122 1123 raises ValueError if set to non-boolean value 1124 1125 @rtype: bool 1126 """ 1127 return self._showinfo
1128 1129 @showinfo.setter
1130 - def showinfo(self, value):
1131 if not isinstance(value, bool): 1132 raise ValueError(('showinfo cannot be {0} (accepted values: True' 1133 + '/False)').format(value)) 1134 1135 self._showinfo = value
1136 1137 @property
1138 - def zoom(self):
1139 """ 1140 zoom value (1.0 == not zoomed) 1141 1142 raises ValueError if C{value} is not castable to float 1143 1144 @rtype: float 1145 """ 1146 return self._zoom
1147 1148 @zoom.setter
1149 - def zoom(self, value):
1150 self._zoom = float(value)
1151 1152 @property
1153 - def dotsize(self):
1154 """ 1155 size of the central dot representing each sequence in the clustermap 1156 1157 raises ValueError if C{value} is not castable to int 1158 1159 @rtype: int 1160 """ 1161 return self._dotsize
1162 1163 @dotsize.setter
1164 - def dotsize(self, value):
1165 self._dotsize = int(value)
1166 1167 @property
1168 - def ovalsize(self):
1169 """ 1170 size of the circle around selected sequences 1171 1172 raises ValueError if value not castable to int 1173 1174 @rtype: int 1175 """ 1176 return self._ovalsize
1177 1178 @ovalsize.setter
1179 - def ovalsize(self, value):
1180 self._ovalsize = int(value)
1181 1182 @property
1183 - def groupsize(self):
1184 """ 1185 default for the size of circles that mark newly created groups 1186 1187 raises ValueError if C{value} is not castable to int 1188 1189 @rtype: int 1190 """ 1191 return self._groupsize
1192 1193 @groupsize.setter
1194 - def groupsize(self, value):
1195 self._groupsize = int(value)
1196 1197 @property
1198 - def usefoldchange(self):
1199 """ 1200 parameter with unclear function. Check in Clans. 1201 1202 raises ValueError if set to non-boolean value 1203 1204 @rtype: bool 1205 """ 1206 return self._usefoldchange
1207 1208 @usefoldchange.setter
1209 - def usefoldchange(self, value):
1210 if not isinstance(value, bool): 1211 raise ValueError(('usefoldchange cannot be {0} (accepted values: ' 1212 + 'True/False)').format(value)) 1213 1214 self._usefoldchange = value
1215 1216 @property
1217 - def avgfoldchange(self):
1218 """ 1219 parameter with unclear function. Check in Clans. 1220 1221 raises ValueError if set to non-boolean value 1222 1223 @rtype: bool 1224 """ 1225 return self._avgfoldchange
1226 1227 @avgfoldchange.setter
1228 - def avgfoldchange(self, value):
1229 if not isinstance(value, bool): 1230 raise ValueError(('avgfoldchange cannot be {0} (accepted values: ' 1231 + 'True/False)').format(value)) 1232 1233 self._avgfoldchange = value
1234 1235 @property
1236 - def colors(self):
1237 """ 1238 colors that the coloring for different p-values/attractions 1239 1240 raises ValueError if set to s.th. else than a dict 1241 1242 @rtype: dict 1243 """ 1244 return self._colors
1245 1246 @colors.setter
1247 - def colors(self, value):
1248 if not isinstance(value, dict): 1249 raise ValueError('colors must be a dict') 1250 self._colors = value
1251
1252 - def set_default_params(self):
1253 """ 1254 Sets the parameters to CLANS default values. 1255 See L{ClansParams}._DEFAULTS. 1256 """ 1257 for k, v in self._DEFAULTS.items(): 1258 if k == 'colors': 1259 continue 1260 1261 self.__setattr__(k, v) 1262 1263 self._colors = {} 1264 for i, color in ClansParams._DEFAULTS['colors'].items(): 1265 self.colors[i] = Color(*color)
1266
1267 - def _to_clans_param_block(self):
1268 """ 1269 Creates a param block for a CLANS file from the L{ClansParams} values. 1270 1271 @return: a CLANS file format <param>[data]</param> block 1272 @rtype: str 1273 """ 1274 1275 param_dict = {} 1276 1277 for param_name in sorted(ClansParams._DEFAULTS): 1278 if param_name == 'colors': 1279 1280 ## divide 'colors' into 'colorcutoffs' and 'colorarr' 1281 cutoffs = sorted(self.colors) 1282 param_dict['colorcutoffs'] = ''.join( 1283 ['{0:.2f};'.format(cutoff) for cutoff in cutoffs]) 1284 param_dict['colorarr'] = ''.join( 1285 ['({0}):'.format(self.colors[cutoff].to_clans_color()) 1286 for cutoff in cutoffs]) 1287 1288 continue 1289 1290 if param_name in ('avgfoldchange', 'cluster2d', 'complexatt', 1291 'showinfo', 'usefoldchange', 'usescval'): 1292 param_dict[param_name] = ['false', 'true'][ 1293 self.__getattribute__(param_name)] 1294 1295 continue 1296 1297 param_dict[param_name] = self.__getattribute__(param_name) 1298 1299 param_block_string = '<param>\n' 1300 param_block_string += '\n'.join( 1301 ['{0}={1}'.format(param_name, param_dict[param_name]) 1302 for param_name in sorted(param_dict)]) 1303 param_block_string += '\n</param>\n' 1304 1305 return param_block_string
1306
1307 1308 -class ClansEntryCollection(csb.core.ReadOnlyCollectionContainer):
1309 """ 1310 Read-only container for holding (and internally managing) L{ClansEntry} 1311 instances. 1312 """ 1313
1314 - def __init__(self):
1315 1316 super(ClansEntryCollection, self).__init__(type=ClansEntry)
1317
1318 - def _remove_item(self, item):
1319 """ 1320 Removes {item} from the collection. 1321 1322 @param item: the item to be removed 1323 @type item: a L{ClansEntry} instance 1324 1325 @raises TypeError: if {item} is not a L{ClansEntry} instance 1326 """ 1327 1328 if self._type: 1329 if not isinstance(item, self._type): 1330 raise TypeError("Item {0} is not of the required {1} type.".format( 1331 item, self._type.__name__)) 1332 self._items.remove(item)
1333
1334 - def _sort(self):
1335 """ 1336 Sort entries by their {name}. 1337 1338 Note: If the L{ClansEntryCollection} is part of a L{Clans} instance, 1339 use L{Clans.sort} instead of this to avoid corrupting L{Clans._idx}. 1340 """ 1341 self._items.sort(key=lambda entry: entry.name)
1342
1343 1344 -class ClansSeqgroupCollection(csb.core.ReadOnlyCollectionContainer):
1345 """ 1346 Read-only container for holding (and internally managing) L{ClansSeqgroup} 1347 instances. 1348 """ 1349
1350 - def __init__(self):
1353
1354 - def _remove_item(self, item):
1355 """ 1356 Removes {item} from the collection. 1357 1358 @param item: the item to be removed 1359 @type item: a L{ClansSeqgroup} instance 1360 1361 @raises TypeError: if {item} is not a L{ClansSeqgroup} instance 1362 """ 1363 1364 if self._type: 1365 if not isinstance(item, self._type): 1366 raise TypeError("Item {0} is not of the required {1} type.".format( 1367 item, self._type.__name__)) 1368 self._items.remove(item)
1369
1370 1371 -class Clans(object):
1372 """ 1373 Class for holding and manipulating data from one CLANS file. 1374 Initialization is always done as empty clustermap with default parameters. 1375 """ 1376
1377 - def __init__(self):
1378 self._filename = None 1379 1380 self._params = ClansParams() 1381 1382 self._rotmtx = None 1383 self.set_default_rotmtx() 1384 1385 self._hsp_att_mode = "hsp" 1386 1387 self._entries = ClansEntryCollection() 1388 self._seqgroups = ClansSeqgroupCollection() 1389 1390 self._idx = None 1391 '''Index dict for fast access to entry positions''' 1392 self._has_good_index = False
1393
1394 - def __repr__(self):
1395 return 'Clans object: {0} sequences; {1} seqgroups'.format( 1396 len(self), len(self.seqgroups))
1397 1398 __str__ = __repr__ 1399
1400 - def __len__(self):
1401 return len(self.entries)
1402
1403 - def __getitem__(self, index):
1404 return self.entries[index]
1405
1406 - def __setitem__(self, index, data):
1407 self.entries[index] = data 1408 self._has_good_index = False
1409 1410 @property
1411 - def filename(self):
1412 """ 1413 file from which the data was parsed 1414 1415 @rtype: str or None 1416 """ 1417 return self._filename
1418 1419 @property
1420 - def params(self):
1421 """ 1422 L{ClansParams} that contains the parameters set for this L{Clans} 1423 instance. 1424 1425 @rtype: L{ClansParams} 1426 """ 1427 return self._params
1428 1429 @property
1430 - def rotmtx(self):
1431 """ 1432 3x3 rotation matrix that indicates the rotation state of the clustermap 1433 1434 raises ValueError if rotation matrix shape is not 3x3 1435 1436 @rtype: numpy.array 1437 """ 1438 return self._rotmtx
1439 1440 @rotmtx.setter
1441 - def rotmtx(self, value):
1442 if value.shape != (3, 3): 1443 raise ValueError('rotation matrix needs to be a 3x3 numpy array') 1444 self._rotmtx = value
1445 1446 @property
1447 - def entries(self):
1448 """ 1449 list of clustermap L{ClansEntry}s. 1450 1451 @rtype: list 1452 """ 1453 return self._entries
1454 1455 @property
1456 - def seqgroups(self):
1457 """ 1458 list of L{ClansSeqgroup}s defined in the clustermap. 1459 1460 @rtype: list 1461 """ 1462 return self._seqgroups
1463
1464 - def set_default_rotmtx(self):
1465 """ 1466 Resets the rotation matrix (rotmtx) to no rotation. 1467 """ 1468 self.rotmtx = eye(3)
1469
1470 - def _update_index(self):
1471 """ 1472 Creates an index of L{ClansEntry}s to their position in the L{Clans} 1473 instance. 1474 1475 The index is used to allow for fast access via L{ClansEntry.get_id} and 1476 was introduced to get a better L{Clans}.write() performance, which 1477 suffered from excessive entry.get_id() calls during HSP block generation 1478 (see L{ClansFileWriter.add_hsp_block}). 1479 1480 @attention: the index needs unique entry names. This is ensured with a 1481 call to L{Clans.remove_duplicates} and can decrease the number of 1482 entries! 1483 """ 1484 self.remove_duplicates() 1485 1486 self._idx = dict([(e._get_unique_id(), i) 1487 for i, e in enumerate(self.entries)]) 1488 self._has_good_index = True
1489
1490 - def sort(self):
1491 """ 1492 Sorts the L{ClansEntry}s by their {name}. 1493 """ 1494 1495 self._entries._sort() 1496 1497 self._has_good_index = False
1498
1499 - def add_group(self, group, members=None):
1500 """ 1501 Adds a new group. 1502 1503 @param group: the new group 1504 @type group: L{ClansSeqgroup} instance 1505 1506 @param members: L{ClansEntry} instances to be in the new group 1507 @type members: list 1508 1509 @raise ValueError: if group is no ClansSeqgroup instance 1510 """ 1511 self.seqgroups._append_item(group) 1512 1513 if members is not None: 1514 [group.add(member) for member in members]
1515
1516 - def remove_group(self, group):
1517 """ 1518 Removes a group. 1519 1520 @param group: the new group 1521 @type group: L{ClansSeqgroup} instance 1522 """ 1523 self.seqgroups._remove_item(group) 1524 1525 [group.remove(member) for member in group.members]
1526
1527 - def add_entry(self, entry):
1528 """ 1529 Adds an new entry. 1530 1531 @param entry: the new entry 1532 @type entry: L{ClansEntry} instance 1533 1534 @raise ValueError: if C{entry} is no L{ClansEntry} instance 1535 """ 1536 if not isinstance(entry, ClansEntry): 1537 raise ValueError('entries need to be L{ClansEntry} instances') 1538 1539 self.entries._append_item(entry) 1540 entry._parent = self 1541 1542 self._has_good_index = False
1543
1544 - def remove_entry_by_name(self, entry_name):
1545 """ 1546 Removes an entry fetched by its name. 1547 1548 @param entry_name: name of the entry that shall be removed 1549 @type entry_name: string 1550 """ 1551 entry = self.get_entry(entry_name, True) 1552 1553 self.remove_entry(entry)
1554
1555 - def remove_entry(self, entry):
1556 """ 1557 Removes an entry. 1558 1559 @param entry: the entry that shall be removed 1560 @type entry: L{ClansEntry} instance 1561 """ 1562 for other_entry in entry.hsp.keys(): 1563 other_entry.remove_hsp(entry) 1564 1565 for g in entry.groups: 1566 g.remove(entry) 1567 1568 remove_groups = [g for g in self.seqgroups if g.is_empty()] 1569 [self.seqgroups._remove_item(g) for g in remove_groups] 1570 1571 self.entries._remove_item(entry) 1572 self._has_good_index = False
1573
1574 - def get_entry(self, name, pedantic=True):
1575 """ 1576 Checks if an entry with name C{name} exists and returns it. 1577 1578 @param name: name of the sought entry 1579 @type name: str 1580 1581 @param pedantic: If True, a ValueError is raised if multiple entries 1582 with name name are found. If False, returns the first 1583 one. 1584 @type pedantic: bool 1585 1586 @raise ValueError: if no entry with name C{name} is found 1587 @raise ValueError: if multiple entries with name C{name} are found and 1588 C{pedantic == True} 1589 1590 @rtype: L{ClansEntry} 1591 @return: entry with name C{name} 1592 """ 1593 1594 hits = [e for e in self.entries if e.name == name] 1595 1596 if len(hits) == 1: 1597 return hits[0] 1598 1599 elif len(hits) > 1: 1600 if pedantic: 1601 raise ValueError( 1602 'multiple entries have name \'{0}\''.format(name)) 1603 return hits[0] 1604 1605 else: 1606 raise ValueError('ClansEntry {0} does not exist.'.format(name))
1607
1608 - def remove_duplicates(self, identity_function=None):
1609 """ 1610 Determines and removes duplicates using C{identity_function}. 1611 1612 @param identity_function: callable to compare two L{ClansEntry}s as 1613 parameters. Defaults to L{ClansEntryGiComparator}. 1614 @type identity_function: callable 1615 1616 @return: the removed entries 1617 @rtype: list of L{ClansEntry}s 1618 """ 1619 if identity_function is None: 1620 identity_function = ClansEntryGiComparator() 1621 1622 remove_us = list(set([e2 for i, e in enumerate(self.entries) 1623 for e2 in self.entries[i + 1:] 1624 if identity_function(e, e2)])) 1625 1626 [self.remove_entry(e) for e in remove_us] 1627 1628 return remove_us
1629
1630 - def restrict_to_max_pvalue(self, cutoff):
1631 """ 1632 removes all L{ClansEntry}s that have no connections above the C{cutoff} 1633 1634 @param cutoff: the cutoff 1635 @type cutoff: float 1636 """ 1637 ## loop to hit entries that have no HSPs left after the previous round 1638 removed_entries = [] # all removed entries go here 1639 remove_us = ['first_loop_round_starter'] 1640 while len(remove_us) > 0: 1641 1642 remove_us = [] # entries removed this round 1643 for entry in self.entries: 1644 hsp_values = entry.hsp.values() 1645 if len(hsp_values) == 0 or min(hsp_values) >= cutoff: 1646 remove_us.append(entry) 1647 removed_entries.append(entry) 1648 1649 [self.remove_entry(e) for e in remove_us if e in self] 1650 1651 return removed_entries
1652
1653 - def restrict(self, keep_names):
1654 """ 1655 Removes all entries whose name is not in keep_names 1656 1657 @param keep_names: names of entries that shall be kept 1658 @type keep_names: iterable 1659 """ 1660 1661 [self.remove_entry(entry) for entry in 1662 [e for e in self.entries if e.name not in keep_names]]
1663
1664 - def write(self, filename):
1665 """ 1666 writes the L{Clans} instance to a file in CLANS format 1667 1668 @param filename: the target file\'s name 1669 @type filename: str 1670 """ 1671 1672 with open(filename, 'w') as stream: 1673 writer = ClansFileWriter(stream) 1674 writer.serialize(self)
1675
1676 1677 -class ClansEntry(object):
1678 """ 1679 Class holding the data of one CLANS sequence entry. 1680 1681 @param name: the entry name 1682 @type name: str 1683 1684 @param seq: the entry\'s amino acid sequence 1685 @type seq: str 1686 1687 @param coords: coordinates in 3D space 1688 @type coords: iterable with 3 items 1689 1690 @param parent: parent of this entry 1691 @type parent: L{Clans} instance 1692 1693 """ 1694
1695 - def __init__(self, name=None, seq='', coords=None, parent=None):
1696 self._name = name 1697 self._seq = seq 1698 1699 if coords is None: 1700 coords = random.random(3) * 2 - 1 # each CLANS coord is -1.<x<1. 1701 self._coords = coords 1702 1703 self._parent = parent 1704 1705 self._groups = [] 1706 self._hsp = {}
1707
1708 - def __repr__(self):
1709 if self.coords is None: 1710 coords_string = 'NoCoordsSet' 1711 else: 1712 coords_string = '({0:.2f}, {1:.2f}, {2:.2f})'.format( 1713 *tuple(self.coords)) 1714 1715 groups = 'not in a group' 1716 if len(self.groups) > 0: 1717 groups = 'groups: {0}'.format( 1718 ', '.join([g.name for g in self.groups])) 1719 1720 return 'ClansEntry "{0}": {1} '.format( 1721 self.name, '; '.join((coords_string, groups)))
1722 1723 @property
1724 - def name(self):
1725 """ 1726 name of the entry 1727 1728 raises ValueError if C{value} is not a string 1729 1730 @rtype: string 1731 """ 1732 return self._name
1733 1734 @name.setter
1735 - def name(self, value):
1736 if not isinstance(value, csb.core.string): 1737 raise ValueError(('name cannot be {0} (accepted values: ' 1738 + 'strings)').format(value)) 1739 1740 self._name = value
1741 1742 @property
1743 - def seq(self):
1744 """ 1745 protein sequence of the entry 1746 1747 raises ValueError if C{value} is not a string 1748 1749 @rtype: string 1750 """ 1751 return self._seq
1752 1753 @seq.setter
1754 - def seq(self, value):
1755 if not isinstance(value, csb.core.string): 1756 raise ValueError(('seq cannot be {0} (accepted values: ' 1757 + 'strings)').format(value)) 1758 1759 self._seq = value
1760 1761 @property
1762 - def coords(self):
1763 """ 1764 entry coordinates in 3D space 1765 1766 raises ValueError if C{value} is not an iterable with 3 items 1767 1768 @rtype: string 1769 """ 1770 return self._coords
1771 1772 @coords.setter
1773 - def coords(self, value):
1774 if len(value) != 3: 1775 raise ValueError(('coords cannot be {0} (accepted values: ' 1776 + 'iteratables with 3 items)').format(value)) 1777 1778 self._coords = value
1779 1780 @property
1781 - def parent(self):
1782 """ 1783 L{Clans} instance that parents this L{ClansEntry} 1784 1785 @rtype: L{Clans} 1786 """ 1787 return self._parent
1788 1789 @property
1790 - def groups(self):
1791 """ 1792 L{ClansSeqgroup}s that contain the entry 1793 1794 @rtype: list 1795 """ 1796 return self._groups
1797 1798 @property
1799 - def hsp(self):
1800 """ 1801 connections between this and another L{ClansEntry} 1802 1803 @rtype: dict 1804 """ 1805 return self._hsp
1806
1807 - def get_id(self):
1808 """ 1809 Returns the id of the current entry. 1810 1811 Note: the first call to this method triggers L{Clans._update_index}, 1812 which will make it appear slower than successive calls. 1813 1814 @rtype: str 1815 @return: the entrys\' id is returned unless it has no parent in which 1816 case -1 is returned 1817 """ 1818 1819 if self.parent is None: 1820 return -1 1821 1822 if not self.parent._has_good_index: 1823 self.parent._update_index() 1824 1825 return self.parent._idx[self._get_unique_id()]
1826
1827 - def _get_unique_id(self):
1828 """ 1829 Returns a >>more<< unique ID (however this is not guaranteed to be 1830 really unique) than get_id. This ID determines which entries are deemed 1831 duplets by L{Clans}.remove_duplicates. 1832 1833 @rtype: str 1834 @return: a more or less unique id 1835 """ 1836 return self.name + '<###>' + self.seq
1837
1838 - def add_hsp(self, other, value):
1839 """ 1840 Creates an HSP from self to other with the given value. 1841 1842 @param other: the other entry 1843 @type other: L{ClansEntry} instance 1844 1845 @param value: the value of the HSP 1846 @type value: float 1847 """ 1848 self.hsp[other] = value 1849 other.hsp[self] = value
1850
1851 - def remove_hsp(self, other):
1852 """ 1853 Removes the HSP between C{self} and C{other}; if none exists, does 1854 nothing. 1855 1856 @param other: the other entry 1857 @type other: L{ClansEntry} instance 1858 """ 1859 if other in self.hsp: 1860 self.hsp.pop(other) 1861 1862 if self in other.hsp: 1863 other.hsp.pop(self)
1864
1865 - def output_string_seq(self):
1866 """ 1867 Creates the CLANS <seq> block format representation of the entry. 1868 1869 @rtype: str 1870 @return: entrys\' representation in CLANS <seq> block format 1871 """ 1872 1873 return '>{0}\n{1}\n'.format(self.name, self.seq)
1874
1875 - def output_string_pos(self):
1876 """ 1877 Create the CLANS <pos> block format representation of the entry. 1878 1879 @rtype: str 1880 @return: entrys\' representation in CLANS <pos> block format 1881 """ 1882 return '{0} {1:.8f} {2:.8f} {3:.8f}'.format( 1883 *tuple([self.get_id()] + list(self.coords)))
1884
1885 - def output_string_hsp(self):
1886 """ 1887 Creates the CLANS <hsp> block format representation of the entry. 1888 1889 1890 @rtype: str 1891 @return: entrys\' representation in CLANS <hsp> block format 1892 """ 1893 return '\n'.join(['{0} {1}:{2:.8f}'.format(self.get_id(), 1894 other.get_id(), value) 1895 for (other, value) in self.hsp.items()])
1896
1897 1898 -class ClansSeqgroup(object):
1899 """ 1900 Class holding the data of one CLANS group (seqgroup). 1901 1902 @kwparam name: name of the seqgroup 1903 @type name: string 1904 1905 @kwparam type: symbol used to represent the seqgroup in the graphical 1906 output 1907 @type type: int 1908 1909 @kwparam size: size of the symbol used to represent the seqgroup in the 1910 graphical output 1911 @type name: int 1912 1913 @kwparam hide: if True, the seqgroup\'s symbols in the graphical output are 1914 not drawn; default: False 1915 @type name: bool 1916 1917 @kwparam color: color of the seqgroup 1918 @type color: L{Color} or string formatted like \'x;y;z\' 1919 1920 @kwparam members: list of members of this seqgroup 1921 @type members: list 1922 """ 1923
1924 - def __init__(self, **kw):
1925 self._name = None 1926 self.name = kw.pop('name', 'NO NAME') 1927 1928 self._type = None 1929 self.type = kw.pop('type', 0) 1930 1931 self._size = None 1932 self.size = kw.pop('size', 4) 1933 1934 self._hide = None 1935 self.hide = kw.pop('hide', False) 1936 1937 self._color = None 1938 self.color = kw.pop('color', (255, 255, 255)) 1939 1940 self._members = [] 1941 if 'members' in kw: 1942 for member in kw['members']: 1943 self.add(member)
1944
1945 - def __repr__(self):
1946 return ('ClansSeqgroup {0.name}: type: {0.type}; size: {0.size}; hide:' 1947 + ' {0.hide}; color: {1}; #members: {2}').format( 1948 self, self.color.to_clans_color(), len(self.members))
1949
1950 - def __len__(self):
1951 return len(self.members)
1952 1953 @property
1954 - def name(self):
1955 """ 1956 name of the seqgroup 1957 1958 raises ValueError if C{value} is no string 1959 1960 @rtype: string 1961 """ 1962 return self._name
1963 1964 @name.setter
1965 - def name(self, value):
1966 if not isinstance(value, csb.core.string): 1967 raise ValueError('name must be a string') 1968 self._name = value
1969 1970 @property
1971 - def type(self):
1972 """ 1973 symbol used to represent the seqgroup in the graphical output 1974 1975 raises ValueError if C{value} is not castable to int 1976 1977 @rtype: int 1978 """ 1979 return self._type
1980 1981 @type.setter
1982 - def type(self, value):
1983 self._type = int(value)
1984 1985 @property
1986 - def size(self):
1987 """ 1988 size of the symbol used to represent the seqgroup in the graphical 1989 output 1990 1991 raises ValueError if C{value} is not castable to int 1992 1993 @rtype: int 1994 """ 1995 return self._size
1996 1997 @size.setter
1998 - def size(self, value):
1999 self._size = int(value)
2000 2001 @property
2002 - def hide(self):
2003 """ 2004 if True, the seqgroup\'s symbols in the graphical output are not drawn 2005 2006 raises ValueError if C{value} is no bool 2007 2008 @rtype: int 2009 """ 2010 return self._hide
2011 2012 @hide.setter
2013 - def hide(self, value):
2014 if not isinstance(value, bool): 2015 raise ValueError(('hide cannot be {0} (accepted values: ' 2016 + 'True/False)').format(value)) 2017 2018 self._hide = value
2019 2020 @property
2021 - def color(self):
2022 """ 2023 color of the seqgroup 2024 2025 raises ValueError if set to a wrongly formatted string (correct: 2026 \'{r};{g};{b}\') 2027 2028 @rtype: L{Color} 2029 """ 2030 return self._color
2031 2032 @color.setter
2033 - def color(self, value, separator=';'):
2034 # set values to those of existing Color instance 2035 if isinstance(value, Color): 2036 self._color = value 2037 return 2038 2039 ## parse color from string in format 'r;g;b' 2040 if isinstance(value, csb.core.string): 2041 self._color = Color.from_string(value) 2042 return 2043 2044 # parse 3-item iterables like (3, 5, 6) 2045 if len(value) == 3: 2046 self._color = Color(*tuple(map(int, value))) 2047 return 2048 2049 raise ValueError('cannot parse color from \'{0}\''.format(value))
2050 2051 2052 @property
2053 - def members(self):
2054 """ 2055 the members of this seqgroup 2056 2057 @rtype: list 2058 """ 2059 return self._members
2060
2061 - def is_empty(self):
2062 """ 2063 Checks if the group contains entries. 2064 2065 @rtype: bool 2066 @return: True if the group contains no entries, else False. 2067 """ 2068 return len(self) == 0
2069
2070 - def add(self, new_member):
2071 """ 2072 Adds entry C{new_member} to this L{ClansSeqgroup}. 2073 2074 @param new_member: the member that shall be added to this 2075 L{ClansSeqgroup} 2076 @type new_member: L{ClansEntry} instance 2077 2078 @raise TypeError: if C{new_member} is no L{ClansEntry} instance 2079 @raise ValueError: if C{new_member} is already contained in this 2080 L{ClansSeqgroup} 2081 """ 2082 if not isinstance(new_member, ClansEntry): 2083 raise TypeError('only ClansEntry instances can be added as ' + 2084 'group members') 2085 2086 if self.members.count(new_member) > 0: 2087 raise ValueError(('entry {0.name} is already contained in this ' 2088 + 'seqgroup').format(new_member)) 2089 2090 self.members.append(new_member) 2091 new_member.groups.append(self)
2092
2093 - def remove(self, member):
2094 """ 2095 Removes L{ClansEntry} C{member} from this group. 2096 2097 @param member: the member to be removed 2098 @type member: a L{ClansEntry} instance 2099 2100 @raise TypeError: if C{member} is no L{ClansEntry} instance 2101 @raise ValueError: if C{member} is not part of this L{ClansSeqgroup} 2102 """ 2103 if not isinstance(member, ClansEntry): 2104 raise TypeError('argument must be a ClansEntry instance') 2105 2106 if self.members.count(member) == 0: 2107 raise ValueError(('"{0.name}" is not a member of this ' 2108 + 'seqgroup').format(member)) 2109 2110 self.members.remove(member) 2111 member.groups.remove(self)
2112
2113 - def output_string(self):
2114 """ 2115 Creates the CLANS <seqgroup> block format representation of the 2116 group. 2117 2118 @rtype: str 2119 @return: entrys\' representation in CLANS <seqgroup> block format 2120 """ 2121 sorted_members = sorted([m.get_id() for m in self.members]) 2122 return ('name={0.name}\ntype={0.type}\nsize={0.size}\nhide={1}' 2123 + '\ncolor={2}\nnumbers={3}').format( 2124 self, int(self.hide), self.color.to_clans_color(), 2125 ';'.join([str(val) for val in sorted_members]) + ';')
2126