Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/_config.py: 93%

189 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-06-12 16:32 -0400

1import configparser 

2import fnmatch 

3import pathlib 

4import typing 

5 

6from dataclasses import dataclass 

7from typing import Iterable 

8 

9import click 

10 

11from ensembl_lite._species import Species, species_from_ensembl_tree 

12from ensembl_lite._util import PathType 

13 

14 

15INSTALLED_CONFIG_NAME = "installed.cfg" 

16DOWNLOADED_CONFIG_NAME = "downloaded.cfg" 

17 

18_COMPARA_NAME: str = "compara" 

19_ALIGNS_NAME: str = "aligns" 

20_HOMOLOGIES_NAME: str = "homologies" 

21_GENOMES_NAME: str = "genomes" 

22 

23 

24def make_relative_to( 

25 staging_path: pathlib.Path, install_path: pathlib.Path 

26) -> pathlib.Path: 

27 assert staging_path.is_absolute() and install_path.is_absolute() 

28 

29 for i, (s_part, i_part) in enumerate(zip(staging_path.parts, install_path.parts)): 

30 if s_part != i_part: 

31 break 

32 change_up = ("..",) * (len(staging_path.parts) - i) 

33 rel_path = change_up + install_path.parts[i:] 

34 return pathlib.Path(*rel_path) 

35 

36 

37@dataclass 

38class Config: 

39 host: str 

40 remote_path: str 

41 release: str 

42 staging_path: pathlib.Path 

43 install_path: pathlib.Path 

44 species_dbs: dict[str, list[str]] 

45 align_names: Iterable[str] 

46 tree_names: Iterable[str] 

47 homologies: bool 

48 

49 def __post_init__(self): 

50 self.staging_path = pathlib.Path(self.staging_path) 

51 self.install_path = pathlib.Path(self.install_path) 

52 

53 def update_species(self, species: dict[str, list[str]]): 

54 if not species: 

55 return 

56 for k in species: 

57 if k not in Species: 

58 raise ValueError(f"Unknown species {k}") 

59 self.species_dbs |= species 

60 

61 @property 

62 def db_names(self) -> Iterable[str]: 

63 for species in self.species_dbs: 

64 yield Species.get_ensembl_db_prefix(species) 

65 

66 @property 

67 def staging_genomes(self) -> pathlib.Path: 

68 return self.staging_path / _GENOMES_NAME 

69 

70 @property 

71 def install_genomes(self) -> pathlib.Path: 

72 return self.install_path / _GENOMES_NAME 

73 

74 @property 

75 def staging_homologies(self) -> pathlib.Path: 

76 return self.staging_path / _COMPARA_NAME / _HOMOLOGIES_NAME 

77 

78 @property 

79 def install_homologies(self) -> pathlib.Path: 

80 return self.install_path / _COMPARA_NAME / _HOMOLOGIES_NAME 

81 

82 @property 

83 def staging_aligns(self) -> pathlib.Path: 

84 return self.staging_path / _COMPARA_NAME / _ALIGNS_NAME 

85 

86 @property 

87 def install_aligns(self) -> pathlib.Path: 

88 return self.install_path / _COMPARA_NAME / _ALIGNS_NAME 

89 

90 def to_dict(self, relative_paths: bool = True) -> dict[str, str]: 

91 """returns cfg as a dict""" 

92 if not self.db_names: 

93 raise ValueError("no db names") 

94 

95 if not relative_paths: 

96 staging_path = str(self.staging_path) 

97 install_path = str(self.install_path) 

98 else: 

99 staging_path = "." 

100 install_path = str(make_relative_to(self.staging_path, self.install_path)) 

101 

102 data = { 

103 "remote path": {"path": str(self.remote_path), "host": str(self.host)}, 

104 "local path": { 

105 "staging_path": staging_path, 

106 "install_path": install_path, 

107 }, 

108 "release": {"release": self.release}, 

109 } 

110 

111 if self.align_names or self.tree_names: 

112 data["compara"] = {} 

113 

114 if self.align_names: 

115 data["compara"]["align_names"] = "".join(self.align_names) 

116 if self.tree_names: 

117 data["compara"]["tree_names"] = "".join(self.tree_names) 

118 

119 if self.homologies: 

120 data["compara"]["homologies"] = "" 

121 

122 for db_name in self.db_names: 

123 data[db_name] = {"db": "core"} 

124 

125 return data 

126 

127 def write(self): 

128 """writes a ini to staging_path/DOWNLOADED_CONFIG_NAME 

129 

130 Notes 

131 ----- 

132 Updates value for staging_path to '.', and install directories to be 

133 relative to staging_path. 

134 """ 

135 parser = configparser.ConfigParser() 

136 cfg = self.to_dict() 

137 for section, settings in cfg.items(): 

138 parser.add_section(section) 

139 for option, val in settings.items(): 

140 parser.set(section, option=option, value=val) 

141 self.staging_path.mkdir(parents=True, exist_ok=True) 

142 with (self.staging_path / DOWNLOADED_CONFIG_NAME).open(mode="w") as out: 

143 parser.write(out, space_around_delimiters=True) 

144 

145 

146@dataclass 

147class InstalledConfig: 

148 release: str 

149 install_path: pathlib.Path 

150 

151 def __hash__(self): 

152 return id(self) 

153 

154 def __post_init__(self): 

155 self.install_path = pathlib.Path(self.install_path) 

156 

157 @property 

158 def compara_path(self) -> pathlib.Path: 

159 return self.install_path / _COMPARA_NAME 

160 

161 @property 

162 def homologies_path(self) -> pathlib.Path: 

163 return self.compara_path / _HOMOLOGIES_NAME 

164 

165 @property 

166 def aligns_path(self) -> pathlib.Path: 

167 return self.compara_path / _ALIGNS_NAME 

168 

169 @property 

170 def genomes_path(self) -> pathlib.Path: 

171 return self.install_path / _GENOMES_NAME 

172 

173 def installed_genome(self, species: str) -> pathlib.Path: 

174 db_name = Species.get_ensembl_db_prefix(species) 

175 return self.genomes_path / db_name 

176 

177 def list_genomes(self): 

178 """returns list of installed genomes""" 

179 return [p.name for p in self.genomes_path.glob("*") if p.name in Species] 

180 

181 def path_to_alignment(self, pattern: str) -> pathlib.Path | None: 

182 """returns the full path to alignment matching the name 

183 

184 Parameters 

185 ---------- 

186 pattern 

187 glob pattern for the Ensembl alignment name 

188 """ 

189 align_dirs = [ 

190 d for d in self.aligns_path.glob("*") if fnmatch.fnmatch(d.name, pattern) 

191 ] 

192 if not align_dirs: 

193 return None 

194 

195 if len(align_dirs) > 1: 

196 raise ValueError( 

197 f"{pattern!r} matches too many directories in {self.aligns_path}" 

198 ) 

199 

200 return align_dirs[0] 

201 

202 

203def write_installed_cfg(config: Config) -> PathType: 

204 """writes an ini file under config.installed_path""" 

205 parser = configparser.ConfigParser() 

206 parser.add_section("release") 

207 parser.set("release", "release", config.release) 

208 # create all the genome 

209 outpath = config.install_path / INSTALLED_CONFIG_NAME 

210 outpath.parent.mkdir(parents=True, exist_ok=True) 

211 with outpath.open(mode="w") as out: 

212 parser.write(out) 

213 return outpath 

214 

215 

216def read_installed_cfg(path: PathType) -> InstalledConfig: 

217 """reads an ini file under config.installed_path""" 

218 parser = configparser.ConfigParser() 

219 path = ( 

220 path if path.name == INSTALLED_CONFIG_NAME else (path / INSTALLED_CONFIG_NAME) 

221 ) 

222 if not path.exists(): 

223 print(f"{str(path)} does not exist, exiting") 

224 exit(1) 

225 

226 parser.read(path) 

227 release = parser.get("release", "release") 

228 return InstalledConfig(release=release, install_path=path.parent) 

229 

230 

231def _standardise_path(path: str, config_path: pathlib.Path) -> pathlib.Path: 

232 path = pathlib.Path(path).expanduser() 

233 return path if path.is_absolute() else (config_path / path).resolve() 

234 

235 

236def read_config( 

237 config_path: pathlib.Path, root_dir: typing.Optional[pathlib.Path] = None 

238) -> Config: 

239 """returns ensembl release, local path, and db specifics from the provided 

240 config path""" 

241 from ensembl_lite._download import download_ensembl_tree 

242 

243 if not config_path.exists(): 

244 click.secho(f"File not found {config_path.resolve()!s}", fg="red") 

245 exit(1) 

246 

247 parser = configparser.ConfigParser() 

248 

249 with config_path.expanduser().open() as f: 

250 parser.read_file(f) 

251 

252 if root_dir is None: 

253 root_dir = config_path.parent 

254 

255 release = parser.get("release", "release") 

256 host = parser.get("remote path", "host") 

257 remote_path = parser.get("remote path", "path") 

258 remote_path = remote_path[:-1] if remote_path.endswith("/") else remote_path 

259 # paths 

260 staging_path = _standardise_path(parser.get("local path", "staging_path"), root_dir) 

261 install_path = _standardise_path(parser.get("local path", "install_path"), root_dir) 

262 

263 homologies = parser.has_option("compara", "homologies") 

264 species_dbs = {} 

265 get_option = parser.get 

266 align_names = [] 

267 tree_names = [] 

268 for section in parser.sections(): 

269 if section in ("release", "remote path", "local path"): 

270 continue 

271 

272 if section == "compara": 

273 value = get_option(section, "align_names", fallback=None) 

274 align_names = [] if value is None else [n.strip() for n in value.split(",")] 

275 value = get_option(section, "tree_names", fallback=None) 

276 tree_names = [] if value is None else [n.strip() for n in value.split(",")] 

277 continue 

278 

279 dbs = [db.strip() for db in get_option(section, "db").split(",")] 

280 

281 # handle synonyms 

282 species = Species.get_species_name(section, level="raise") 

283 species_dbs[species] = dbs 

284 

285 # we also want homologies if we want alignments 

286 homologies = homologies or bool(align_names) 

287 

288 if tree_names: 

289 # add all species in the tree to species_dbs 

290 for tree_name in tree_names: 

291 tree = download_ensembl_tree(host, remote_path, release, tree_name) 

292 sp = species_from_ensembl_tree(tree) 

293 species_dbs.update(sp) 

294 

295 return Config( 

296 host=host, 

297 remote_path=remote_path, 

298 release=release, 

299 staging_path=staging_path, 

300 install_path=install_path, 

301 species_dbs=species_dbs, 

302 align_names=align_names, 

303 tree_names=tree_names, 

304 homologies=homologies, 

305 )