Coverage for /home/deng/Projects/ete4/hackathon/ete4/ete4/evol/model.py: 14%
152 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
1#!/usr/bin/python3
3"""
4This module defines the evolutionary Model that can be linked
5to phylogeny, and computed by one of codeml, gerp, slr.
6"""
8from re import sub
9from warnings import warn
11from ..evol.control import PARAMS, AVAIL
12from ..evol.parser import parse_paml, parse_rst, get_ancestor, parse_slr
13try:
14 from ..treeview.faces import SequencePlotFace
15except ImportError:
16 TREEVIEW = False
17else:
18 TREEVIEW = True
20class Model:
21 '''Evolutionary model.
22 "omega" stands for starting value of omega, in the computation. As
23 Zihen Yang says, it is good to try with different starting values...
24 model linked to tree by _tree variable
25 results of calculation are stored in dictionaries:
26 * branches: w dN dS bL by mean of their node_id
27 * sites : values at each site.
28 * classes : classes of sites and proportions
29 * stats : lnL number of parameters kappa value and codon frequencies stored here.
31 available models are:
32 =========== ============================= ==================
33 Model name Description Model kind
34 =========== ============================= ==================\n%s
35 =========== ============================= ==================\n
37 :argument model_name: string with model name. Add a dot followed by anything at the end of the string in order to extend the name of the model and avoid overwriting.
38 :argument None tree: a Tree object
39 :argument None path: path to outfile, were model computation output can be found.
41 '''
42 def __init__(self, model_name, tree=None, path=None, **kwargs):
43 self._tree = tree
44 self.name, args = check_name(model_name)
45 self.sites = None
46 self.classes = None
47 self.n_classes = None
48 self.branches = {}
49 self.stats = {}
50 self.properties = {}
51 for a, b in list(args.items()):
52 self.properties [a] = b
53 params = dict(list(PARAMS.items()))
54 self._change_params(params)
55 for key, arg in list(kwargs.items()):
56 if key not in params:
57 warn('WARNING: unknown param %s, can cause problems...'% (key))
58 if key == 'gappy':
59 arg = not arg
60 params[key] = arg
61 self.__check_marks()
62 if path:
63 self._load(path)
65 def __str__(self):
66 '''
67 to print nice info
68 '''
69 str_mark = ''
70 str_line = '\n mark:%-5s, omega: %-10s, node_ids: %-4s, name: %s'
71 for i, node in enumerate(self._tree.traverse()):
72 if node.is_root:
73 str_mark += str_line % (self.branches[node.props.get('node_id')]['mark'],
74 'None',
75 node.props.get('node_id'), node.name or 'ROOT')
76 else:
77 str_mark += str_line % (self.branches[node.props.get('node_id')]['mark'],
78 self.branches[node.props.get('node_id')].get('w',
79 'None'),
80 node.props.get('node_id'), node.name or 'EDGE')
81 str_site = ''
82 str_line = '\n %-12s: %s '
83 if self.classes:
84 for t in [t for t in sorted(self.classes)]:
85 str_site += str_line % (t, ' '.join(['%s%s=%-9s' % (t[0], j, i)\
86 for j, i in \
87 enumerate(self.classes[t])]
88 ))
89 return ''' Evolutionary Model %s:
90 log likelihood : %s
91 number of parameters : %s
92 sites inference : %s
93 sites classes : %s
94 branches : %s
95 ''' % (self.name,
96 self.lnL if 'lnL' in self.stats else 'None',
97 self.np if 'np' in self.stats else 'None',
98 ', '.join(sorted(list(self.sites.keys()))) if self.sites else 'None',
99 str_site if self.classes else 'None',
100 str_mark if self.branches else 'None'
101 )
104 def __check_marks(self):
105 """
106 checks if tree is marked and if model allows marks.
107 fill up branches dict with marks
108 """
109 has_mark = any(n.props.get('mark') for n in self._tree.descendants())
110 for i, node in enumerate(self._tree.traverse()):
111 if has_mark and self.properties['allow_mark']:
112 self.branches[node.props.get('node_id')] = {'mark': node.props.get('mark') or ' #0'}
113 elif 'branch' in self.properties['typ']:
114 self.branches[node.props.get('node_id')] = {'mark': ' #'+str(i)}
115 else:
116 self.branches[node.props.get('node_id')] = {'mark': ''}
118 def _load(self, path):
119 '''
120 parse outfiles and load in model object
121 '''
122 if self.properties['exec'] == 'codeml':
123 parse_paml(path, self)
124 # parse rst file if site or branch-site model
125 if 'site' in self.properties['typ']:
126 # sites and classes attr
127 for key, val in parse_rst(path).items():
128 setattr(self, key, val)
129 if 'ancestor' in self.properties['typ']:
130 get_ancestor(path, self)
131 vars(self) ['lnL'] = self.stats ['lnL']
132 vars(self) ['np'] = self.stats ['np']
133 elif self.properties['exec'] == 'Slr':
134 for key, val in parse_slr(path).items():
135 setattr (self, key, val)
136 vars(self) ['lnL'] = 0
137 vars(self) ['np'] = 0
139 def _change_params(self, params):
140 '''
141 change model specific values
142 '''
143 for key, change in self.properties ['changes']:
144 params[key] = change
145 self.properties ['params'] = params
147 def set_histface(self, up=True, hlines=(1.0, 0.3), kind='bar',
148 errors=False, colors=None, **kwargs):
149 """
150 To add histogram face for a given site mdl (M1, M2, M7, M8)
151 can choose to put it up or down the tree.
152 2 types are available:
153 * stick: to draw histogram.
154 * curve: to draw plot.
155 You can define color scheme by passing a diccionary, default is:
156 col = {'NS' : 'grey' ,
157 'RX' : 'green' ,
158 'RX+': 'green' ,
159 'CN' : 'cyan' ,
160 'CN+': 'blue' ,
161 'PS' : 'orange',
162 'PS+': 'red' }
163 """
164 if self.sites is None:
165 warn("WARNING: model %s not computed." % (self.name))
166 return None
167 if not 'header' in kwargs:
168 kwargs['header'] = 'Omega value for sites under %s model' % \
169 (self.name)
170 if 'BEB' in self.sites:
171 val = 'BEB'
172 elif 'NEB' in self.sites:
173 val = 'NEB'
174 else:
175 val = 'SLR'
176 colors = self.colorize_rst(val, col=colors)
177 if not 'ylim' in kwargs:
178 kwargs['ylim'] = (0, 2)
179 if errors:
180 errors = self.sites[val].get('se', None)
181 if TREEVIEW:
182 try:
183 hist = SequencePlotFace(self.sites[val]['w'], hlines=hlines,
184 colors=colors, errors=errors,
185 ylabel=u'Omega (\u03c9)', kind=kind,
186 **kwargs)
187 except KeyError:
188 raise Exception('ERROR: no sites to display, only available ' +
189 'histfaces for site models\n')
190 if up:
191 setattr(hist, 'up', True)
192 else:
193 setattr(hist, 'up', False)
194 else:
195 hist = None
196 self.properties['histface'] = hist
199 def get_ctrl_string(self, outfile=None):
200 '''
201 generate ctrl string to write to a file, if file is given,
202 write it, otherwise returns the string
204 :argument None outfile: if a path is given here, write control string into it.
206 :returns: the control string
208 '''
209 string = ''
210 if 'sep' in self.properties:
211 sep = self.properties ['sep']
212 else:
213 sep = ' = '
214 for prm in ['seqfile', 'treefile', 'outfile']:
215 string += '%15s%s%s\n' % (prm, sep,
216 str(self.properties['params'][prm]))
217 string += '\n'
218 for prm in sorted(list(self.properties ['params'].keys()), key=lambda x:
219 sub('fix_', '', x.lower())):
220 if prm in ['seqfile', 'treefile', 'outfile']:
221 continue
222 if str(self.properties ['params'][prm]).startswith('*'):
223 continue
224 #string += ' *'+'%13s = %s\n' \
225 # % (p, str(self.properties ['params'][p])[1:])
226 else:
227 string += '%15s%s%s\n' % (prm, sep,
228 str(self.properties ['params'][prm]))
229 if outfile is None:
230 return string
231 else:
232 open(outfile, 'w').write(string)
234 def colorize_rst(self, val, col=None):
235 '''
236 Colorize function, that take in argument a list of values
237 corresponding to a list of classes and returns a list of
238 colors to paint histogram.
240 :param val: type of estimation, can be BEB or NEB (only
241 positive-selection models have BEB)
242 :param None col: a dictionary of colors that by default is:
243 {"NS" : "grey",
244 "RX" : "green",
245 "RX+": "green",
246 "CN" : "cyan",
247 "CN+": "blue",
248 "PS" : "orange",
249 "PS+": "red"}
251 :returns: a list of colors dependending categories of sites that are among:
252 - CN+ > 0.99 probabylity of beloging to conserved class of site
253 - CN > 0.95 probabylity of beloging to conserved class of site
254 - NS not significant
255 - RX+ > 0.99 probabylity of beloging to relaxed class of site
256 - RX > 0.95 probabylity of beloging to relaxed class of site
257 - PS+ > 0.99 probabylity of beloging to positively-selected class of site
258 - PS > 0.95 probabylity of beloging to positively-selected class of site
259 '''
260 col = col or {'NS' : 'grey',
261 'RX' : 'green',
262 'RX+': 'green',
263 'CN' : 'cyan',
264 'CN+': 'blue',
265 'PS' : 'orange',
266 'PS+': 'red'}
267 if not 'site' in self.properties['typ']:
268 raise Exception('ERROR: histogram are only for site and '
269 'branch-site models.')
270 categories = self.significance_by_site(val)
271 return [col[cat] for cat in categories]
273 def significance_by_site(self, val):
274 '''
275 Summarize significance of site models.
277 :param val: type of estimation, can be BEB or NEB (only
278 positive-selection models have BEB)
280 :returns: a list of categories among:
281 - CN+ > 0.99 probabylity of beloging to conserved class of site
282 - CN > 0.95 probabylity of beloging to conserved class of site
283 - NS not significant
284 - RX+ > 0.99 probabylity of beloging to relaxed class of site
285 - RX > 0.95 probabylity of beloging to relaxed class of site
286 - PS+ > 0.99 probabylity of beloging to positively-selected class of site
287 - PS > 0.95 probabylity of beloging to positively-selected class of site
288 '''
289 if not 'site' in self.properties['typ']:
290 raise Exception('ERROR: only for site and '
291 'branch-site models.')
292 ps_model = 'positive' in self.properties['evol']
293 categories = []
294 for pval, curr_class in zip(self.sites[val]['pv'],
295 self.sites[val]['class']):
296 if pval < 0.95:
297 categories.append('NS')
298 elif curr_class == self.n_classes[val] and not ps_model:
299 if pval < 0.99:
300 categories.append('RX')
301 else:
302 categories.append('RX+')
303 elif curr_class == 1:
304 if pval < 0.99:
305 categories.append('CN')
306 else:
307 categories.append('CN+')
308 elif curr_class >= self.n_classes[val] and ps_model:
309 if pval < 0.99:
310 categories.append('PS')
311 else:
312 categories.append('PS+')
313 elif curr_class == self.n_classes[val]:
314 if pval < 0.99:
315 categories.append('RX')
316 else:
317 categories.append('RX+')
318 else:
319 categories.append('NS')
320 return categories
323def check_name(model):
324 '''
325 check that model name corresponds to one of the available
326 '''
327 if sub(r'\..*', '', model) in AVAIL:
328 return model, AVAIL [sub(r'\..*', '', model)]
332Model.__doc__ = Model.__doc__ % \
333 ('\n'.join([ ' %-8s %-27s %-15s ' % \
334 ('%s' % (x), AVAIL[x]['evol'], AVAIL[x]['typ']) \
335 for x in sorted(sorted(AVAIL.keys()),key=lambda x: \
336 AVAIL[x]['typ'],
337 reverse=True)]))