Coverage for /home/deng/Projects/ete4/hackathon/ete4/ete4/evol/model.py: 14%

152 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1#!/usr/bin/python3 

2 

3""" 

4This module defines the evolutionary Model that can be linked 

5to phylogeny, and computed by one of codeml, gerp, slr. 

6""" 

7 

8from re import sub 

9from warnings import warn 

10 

11from ..evol.control import PARAMS, AVAIL 

12from ..evol.parser import parse_paml, parse_rst, get_ancestor, parse_slr 

13try: 

14 from ..treeview.faces import SequencePlotFace 

15except ImportError: 

16 TREEVIEW = False 

17else: 

18 TREEVIEW = True 

19 

20class Model: 

21 '''Evolutionary model. 

22 "omega" stands for starting value of omega, in the computation. As 

23 Zihen Yang says, it is good to try with different starting values... 

24 model linked to tree by _tree variable 

25 results of calculation are stored in dictionaries: 

26 * branches: w dN dS bL by mean of their node_id 

27 * sites : values at each site. 

28 * classes : classes of sites and proportions 

29 * stats : lnL number of parameters kappa value and codon frequencies stored here. 

30 

31 available models are: 

32 =========== ============================= ================== 

33 Model name Description Model kind 

34 =========== ============================= ==================\n%s 

35 =========== ============================= ==================\n 

36 

37 :argument model_name: string with model name. Add a dot followed by anything at the end of the string in order to extend the name of the model and avoid overwriting. 

38 :argument None tree: a Tree object 

39 :argument None path: path to outfile, were model computation output can be found. 

40 

41 ''' 

42 def __init__(self, model_name, tree=None, path=None, **kwargs): 

43 self._tree = tree 

44 self.name, args = check_name(model_name) 

45 self.sites = None 

46 self.classes = None 

47 self.n_classes = None 

48 self.branches = {} 

49 self.stats = {} 

50 self.properties = {} 

51 for a, b in list(args.items()): 

52 self.properties [a] = b 

53 params = dict(list(PARAMS.items())) 

54 self._change_params(params) 

55 for key, arg in list(kwargs.items()): 

56 if key not in params: 

57 warn('WARNING: unknown param %s, can cause problems...'% (key)) 

58 if key == 'gappy': 

59 arg = not arg 

60 params[key] = arg 

61 self.__check_marks() 

62 if path: 

63 self._load(path) 

64 

65 def __str__(self): 

66 ''' 

67 to print nice info 

68 ''' 

69 str_mark = '' 

70 str_line = '\n mark:%-5s, omega: %-10s, node_ids: %-4s, name: %s' 

71 for i, node in enumerate(self._tree.traverse()): 

72 if node.is_root: 

73 str_mark += str_line % (self.branches[node.props.get('node_id')]['mark'], 

74 'None', 

75 node.props.get('node_id'), node.name or 'ROOT') 

76 else: 

77 str_mark += str_line % (self.branches[node.props.get('node_id')]['mark'], 

78 self.branches[node.props.get('node_id')].get('w', 

79 'None'), 

80 node.props.get('node_id'), node.name or 'EDGE') 

81 str_site = '' 

82 str_line = '\n %-12s: %s ' 

83 if self.classes: 

84 for t in [t for t in sorted(self.classes)]: 

85 str_site += str_line % (t, ' '.join(['%s%s=%-9s' % (t[0], j, i)\ 

86 for j, i in \ 

87 enumerate(self.classes[t])] 

88 )) 

89 return ''' Evolutionary Model %s: 

90 log likelihood : %s 

91 number of parameters : %s 

92 sites inference : %s 

93 sites classes : %s 

94 branches : %s 

95 ''' % (self.name, 

96 self.lnL if 'lnL' in self.stats else 'None', 

97 self.np if 'np' in self.stats else 'None', 

98 ', '.join(sorted(list(self.sites.keys()))) if self.sites else 'None', 

99 str_site if self.classes else 'None', 

100 str_mark if self.branches else 'None' 

101 ) 

102 

103 

104 def __check_marks(self): 

105 """ 

106 checks if tree is marked and if model allows marks. 

107 fill up branches dict with marks 

108 """ 

109 has_mark = any(n.props.get('mark') for n in self._tree.descendants()) 

110 for i, node in enumerate(self._tree.traverse()): 

111 if has_mark and self.properties['allow_mark']: 

112 self.branches[node.props.get('node_id')] = {'mark': node.props.get('mark') or ' #0'} 

113 elif 'branch' in self.properties['typ']: 

114 self.branches[node.props.get('node_id')] = {'mark': ' #'+str(i)} 

115 else: 

116 self.branches[node.props.get('node_id')] = {'mark': ''} 

117 

118 def _load(self, path): 

119 ''' 

120 parse outfiles and load in model object 

121 ''' 

122 if self.properties['exec'] == 'codeml': 

123 parse_paml(path, self) 

124 # parse rst file if site or branch-site model 

125 if 'site' in self.properties['typ']: 

126 # sites and classes attr 

127 for key, val in parse_rst(path).items(): 

128 setattr(self, key, val) 

129 if 'ancestor' in self.properties['typ']: 

130 get_ancestor(path, self) 

131 vars(self) ['lnL'] = self.stats ['lnL'] 

132 vars(self) ['np'] = self.stats ['np'] 

133 elif self.properties['exec'] == 'Slr': 

134 for key, val in parse_slr(path).items(): 

135 setattr (self, key, val) 

136 vars(self) ['lnL'] = 0 

137 vars(self) ['np'] = 0 

138 

139 def _change_params(self, params): 

140 ''' 

141 change model specific values 

142 ''' 

143 for key, change in self.properties ['changes']: 

144 params[key] = change 

145 self.properties ['params'] = params 

146 

147 def set_histface(self, up=True, hlines=(1.0, 0.3), kind='bar', 

148 errors=False, colors=None, **kwargs): 

149 """ 

150 To add histogram face for a given site mdl (M1, M2, M7, M8) 

151 can choose to put it up or down the tree. 

152 2 types are available: 

153 * stick: to draw histogram. 

154 * curve: to draw plot. 

155 You can define color scheme by passing a diccionary, default is: 

156 col = {'NS' : 'grey' , 

157 'RX' : 'green' , 

158 'RX+': 'green' , 

159 'CN' : 'cyan' , 

160 'CN+': 'blue' , 

161 'PS' : 'orange', 

162 'PS+': 'red' } 

163 """ 

164 if self.sites is None: 

165 warn("WARNING: model %s not computed." % (self.name)) 

166 return None 

167 if not 'header' in kwargs: 

168 kwargs['header'] = 'Omega value for sites under %s model' % \ 

169 (self.name) 

170 if 'BEB' in self.sites: 

171 val = 'BEB' 

172 elif 'NEB' in self.sites: 

173 val = 'NEB' 

174 else: 

175 val = 'SLR' 

176 colors = self.colorize_rst(val, col=colors) 

177 if not 'ylim' in kwargs: 

178 kwargs['ylim'] = (0, 2) 

179 if errors: 

180 errors = self.sites[val].get('se', None) 

181 if TREEVIEW: 

182 try: 

183 hist = SequencePlotFace(self.sites[val]['w'], hlines=hlines, 

184 colors=colors, errors=errors, 

185 ylabel=u'Omega (\u03c9)', kind=kind, 

186 **kwargs) 

187 except KeyError: 

188 raise Exception('ERROR: no sites to display, only available ' + 

189 'histfaces for site models\n') 

190 if up: 

191 setattr(hist, 'up', True) 

192 else: 

193 setattr(hist, 'up', False) 

194 else: 

195 hist = None 

196 self.properties['histface'] = hist 

197 

198 

199 def get_ctrl_string(self, outfile=None): 

200 ''' 

201 generate ctrl string to write to a file, if file is given, 

202 write it, otherwise returns the string 

203 

204 :argument None outfile: if a path is given here, write control string into it. 

205 

206 :returns: the control string 

207 

208 ''' 

209 string = '' 

210 if 'sep' in self.properties: 

211 sep = self.properties ['sep'] 

212 else: 

213 sep = ' = ' 

214 for prm in ['seqfile', 'treefile', 'outfile']: 

215 string += '%15s%s%s\n' % (prm, sep, 

216 str(self.properties['params'][prm])) 

217 string += '\n' 

218 for prm in sorted(list(self.properties ['params'].keys()), key=lambda x: 

219 sub('fix_', '', x.lower())): 

220 if prm in ['seqfile', 'treefile', 'outfile']: 

221 continue 

222 if str(self.properties ['params'][prm]).startswith('*'): 

223 continue 

224 #string += ' *'+'%13s = %s\n' \ 

225 # % (p, str(self.properties ['params'][p])[1:]) 

226 else: 

227 string += '%15s%s%s\n' % (prm, sep, 

228 str(self.properties ['params'][prm])) 

229 if outfile is None: 

230 return string 

231 else: 

232 open(outfile, 'w').write(string) 

233 

234 def colorize_rst(self, val, col=None): 

235 ''' 

236 Colorize function, that take in argument a list of values 

237 corresponding to a list of classes and returns a list of 

238 colors to paint histogram. 

239 

240 :param val: type of estimation, can be BEB or NEB (only 

241 positive-selection models have BEB) 

242 :param None col: a dictionary of colors that by default is: 

243 {"NS" : "grey", 

244 "RX" : "green", 

245 "RX+": "green", 

246 "CN" : "cyan", 

247 "CN+": "blue", 

248 "PS" : "orange", 

249 "PS+": "red"} 

250 

251 :returns: a list of colors dependending categories of sites that are among: 

252 - CN+ > 0.99 probabylity of beloging to conserved class of site 

253 - CN > 0.95 probabylity of beloging to conserved class of site 

254 - NS not significant 

255 - RX+ > 0.99 probabylity of beloging to relaxed class of site 

256 - RX > 0.95 probabylity of beloging to relaxed class of site 

257 - PS+ > 0.99 probabylity of beloging to positively-selected class of site 

258 - PS > 0.95 probabylity of beloging to positively-selected class of site 

259 ''' 

260 col = col or {'NS' : 'grey', 

261 'RX' : 'green', 

262 'RX+': 'green', 

263 'CN' : 'cyan', 

264 'CN+': 'blue', 

265 'PS' : 'orange', 

266 'PS+': 'red'} 

267 if not 'site' in self.properties['typ']: 

268 raise Exception('ERROR: histogram are only for site and ' 

269 'branch-site models.') 

270 categories = self.significance_by_site(val) 

271 return [col[cat] for cat in categories] 

272 

273 def significance_by_site(self, val): 

274 ''' 

275 Summarize significance of site models. 

276 

277 :param val: type of estimation, can be BEB or NEB (only 

278 positive-selection models have BEB) 

279 

280 :returns: a list of categories among: 

281 - CN+ > 0.99 probabylity of beloging to conserved class of site 

282 - CN > 0.95 probabylity of beloging to conserved class of site 

283 - NS not significant 

284 - RX+ > 0.99 probabylity of beloging to relaxed class of site 

285 - RX > 0.95 probabylity of beloging to relaxed class of site 

286 - PS+ > 0.99 probabylity of beloging to positively-selected class of site 

287 - PS > 0.95 probabylity of beloging to positively-selected class of site 

288 ''' 

289 if not 'site' in self.properties['typ']: 

290 raise Exception('ERROR: only for site and ' 

291 'branch-site models.') 

292 ps_model = 'positive' in self.properties['evol'] 

293 categories = [] 

294 for pval, curr_class in zip(self.sites[val]['pv'], 

295 self.sites[val]['class']): 

296 if pval < 0.95: 

297 categories.append('NS') 

298 elif curr_class == self.n_classes[val] and not ps_model: 

299 if pval < 0.99: 

300 categories.append('RX') 

301 else: 

302 categories.append('RX+') 

303 elif curr_class == 1: 

304 if pval < 0.99: 

305 categories.append('CN') 

306 else: 

307 categories.append('CN+') 

308 elif curr_class >= self.n_classes[val] and ps_model: 

309 if pval < 0.99: 

310 categories.append('PS') 

311 else: 

312 categories.append('PS+') 

313 elif curr_class == self.n_classes[val]: 

314 if pval < 0.99: 

315 categories.append('RX') 

316 else: 

317 categories.append('RX+') 

318 else: 

319 categories.append('NS') 

320 return categories 

321 

322 

323def check_name(model): 

324 ''' 

325 check that model name corresponds to one of the available 

326 ''' 

327 if sub(r'\..*', '', model) in AVAIL: 

328 return model, AVAIL [sub(r'\..*', '', model)] 

329 

330 

331 

332Model.__doc__ = Model.__doc__ % \ 

333 ('\n'.join([ ' %-8s %-27s %-15s ' % \ 

334 ('%s' % (x), AVAIL[x]['evol'], AVAIL[x]['typ']) \ 

335 for x in sorted(sorted(AVAIL.keys()),key=lambda x: \ 

336 AVAIL[x]['typ'], 

337 reverse=True)]))