Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/_config.py: 94%

157 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2024-03-25 13:40 +1100

1import configparser 

2import fnmatch 

3import pathlib 

4 

5from dataclasses import dataclass 

6from typing import Iterable 

7 

8from ensembl_lite._species import Species, species_from_ensembl_tree 

9 

10 

11INSTALLED_CONFIG_NAME = "installed.cfg" 

12DOWNLOADED_CONFIG_NAME = "downloaded.cfg" 

13 

14_COMPARA_NAME: str = "compara" 

15_ALIGNS_NAME: str = "aligns" 

16_HOMOLOGIES_NAME: str = "homologies" 

17_GENOMES_NAME: str = "genomes" 

18 

19 

20@dataclass 

21class Config: 

22 host: str 

23 remote_path: str 

24 release: str 

25 staging_path: pathlib.Path 

26 install_path: pathlib.Path 

27 species_dbs: dict[str, list[str]] 

28 align_names: Iterable[str] 

29 tree_names: Iterable[str] 

30 

31 def update_species(self, species: dict[str, list[str]]): 

32 if not species: 

33 return 

34 for k in species: 

35 if k not in Species: 

36 raise ValueError(f"Unknown species {k}") 

37 self.species_dbs |= species 

38 

39 @property 

40 def db_names(self) -> Iterable[str]: 

41 for species in self.species_dbs: 

42 yield Species.get_ensembl_db_prefix(species) 

43 

44 @property 

45 def staging_genomes(self): 

46 return self.staging_path / _GENOMES_NAME 

47 

48 @property 

49 def install_genomes(self): 

50 return self.install_path / _GENOMES_NAME 

51 

52 @property 

53 def staging_homologies(self): 

54 return self.staging_path / _COMPARA_NAME / _HOMOLOGIES_NAME 

55 

56 @property 

57 def install_homologies(self): 

58 return self.install_path / _COMPARA_NAME / _HOMOLOGIES_NAME 

59 

60 @property 

61 def staging_aligns(self): 

62 return self.staging_path / _COMPARA_NAME / _ALIGNS_NAME 

63 

64 @property 

65 def install_aligns(self): 

66 return self.install_path / _COMPARA_NAME / _ALIGNS_NAME 

67 

68 def to_dict(self): 

69 """returns cfg as a dict""" 

70 if not self.db_names: 

71 raise ValueError("no db names") 

72 

73 data = { 

74 "remote path": {"path": str(self.remote_path), "host": str(self.host)}, 

75 "local path": { 

76 "staging_path": str(self.staging_path), 

77 "install_path": str(self.install_path), 

78 }, 

79 "release": {"release": self.release}, 

80 } 

81 

82 if self.align_names or self.tree_names: 

83 data["compara"] = {} 

84 

85 if self.align_names: 

86 data["compara"]["align_names"] = "".join(self.align_names) 

87 if self.tree_names: 

88 data["compara"]["tree_names"] = "".join(self.tree_names) 

89 

90 for db_name in self.db_names: 

91 data[db_name] = {"db": "core"} 

92 

93 return data 

94 

95 def write(self): 

96 """writes a ini to staging_path/DOWNLOADED_CONFIG_NAME""" 

97 parser = configparser.ConfigParser() 

98 cfg = self.to_dict() 

99 for section, settings in cfg.items(): 

100 parser.add_section(section) 

101 for option, val in settings.items(): 

102 parser.set(section, option=option, value=val) 

103 self.staging_path.mkdir(parents=True, exist_ok=True) 

104 with (self.staging_path / DOWNLOADED_CONFIG_NAME).open(mode="w") as out: 

105 parser.write(out, space_around_delimiters=True) 

106 

107 

108@dataclass 

109class InstalledConfig: 

110 release: str 

111 install_path: pathlib.Path 

112 

113 def __hash__(self): 

114 return id(self) 

115 

116 def __post_init__(self): 

117 self.install_path = pathlib.Path(self.install_path) 

118 

119 @property 

120 def compara_path(self): 

121 return self.install_path / _COMPARA_NAME 

122 

123 @property 

124 def homologies_path(self): 

125 return self.compara_path / _HOMOLOGIES_NAME 

126 

127 @property 

128 def aligns_path(self): 

129 return self.compara_path / _ALIGNS_NAME 

130 

131 @property 

132 def genomes_path(self): 

133 return self.install_path / _GENOMES_NAME 

134 

135 def installed_genome(self, species: str) -> pathlib.Path: 

136 db_name = Species.get_ensembl_db_prefix(species) 

137 return self.genomes_path / db_name 

138 

139 def list_genomes(self): 

140 """returns list of installed genomes""" 

141 return [p.name for p in self.genomes_path.glob("*") if p.name in Species] 

142 

143 def path_to_alignment(self, pattern: str) -> pathlib.Path | None: 

144 """returns the full path to alignment matching the name 

145 

146 Parameters 

147 ---------- 

148 pattern 

149 glob pattern for the Ensembl alignment name 

150 """ 

151 align_dirs = [ 

152 d for d in self.aligns_path.glob("*") if fnmatch.fnmatch(d.name, pattern) 

153 ] 

154 if not align_dirs: 

155 return None 

156 

157 if len(align_dirs) > 1: 

158 raise ValueError( 

159 f"{pattern!r} matches too many directories in {self.aligns_path}" 

160 ) 

161 

162 return align_dirs[0] 

163 

164 

165def write_installed_cfg(config: Config) -> pathlib.Path: 

166 """writes an ini file under config.installed_path""" 

167 parser = configparser.ConfigParser() 

168 parser.add_section("release") 

169 parser.set("release", "release", config.release) 

170 # create all the genome 

171 outpath = config.install_path / INSTALLED_CONFIG_NAME 

172 outpath.parent.mkdir(parents=True, exist_ok=True) 

173 with outpath.open(mode="w") as out: 

174 parser.write(out) 

175 return outpath 

176 

177 

178def read_installed_cfg(path: pathlib.Path) -> InstalledConfig: 

179 """reads an ini file under config.installed_path""" 

180 parser = configparser.ConfigParser() 

181 path = ( 

182 path if path.name == INSTALLED_CONFIG_NAME else (path / INSTALLED_CONFIG_NAME) 

183 ) 

184 if not path.exists(): 

185 print(f"{str(path)} does not exist, exiting") 

186 exit(1) 

187 

188 parser.read(path) 

189 release = parser.get("release", "release") 

190 return InstalledConfig(release=release, install_path=path.parent) 

191 

192 

193def read_config(config_path) -> Config: 

194 """returns ensembl release, local path, and db specifics from the provided 

195 config path""" 

196 from ensembl_lite._download import download_ensembl_tree 

197 

198 parser = configparser.ConfigParser() 

199 

200 with config_path.expanduser().open() as f: 

201 parser.read_file(f) 

202 

203 release = parser.get("release", "release") 

204 host = parser.get("remote path", "host") 

205 remote_path = parser.get("remote path", "path") 

206 remote_path = remote_path[:-1] if remote_path.endswith("/") else remote_path 

207 staging_path = ( 

208 pathlib.Path(parser.get("local path", "staging_path")).expanduser().absolute() 

209 ) 

210 install_path = ( 

211 pathlib.Path(parser.get("local path", "install_path")).expanduser().absolute() 

212 ) 

213 

214 species_dbs = {} 

215 get_option = parser.get 

216 align_names = [] 

217 tree_names = [] 

218 for section in parser.sections(): 

219 if section in ("release", "remote path", "local path"): 

220 continue 

221 

222 if section == "compara": 

223 value = get_option(section, "align_names", fallback=None) 

224 align_names = [] if value is None else [n.strip() for n in value.split(",")] 

225 value = get_option(section, "tree_names", fallback=None) 

226 tree_names = [] if value is None else [n.strip() for n in value.split(",")] 

227 continue 

228 

229 dbs = [db.strip() for db in get_option(section, "db").split(",")] 

230 

231 # handle synonyms 

232 species = Species.get_species_name(section, level="raise") 

233 species_dbs[species] = dbs 

234 

235 if tree_names: 

236 # add all species in the tree to species_dbs 

237 for tree_name in tree_names: 

238 tree = download_ensembl_tree(host, remote_path, release, tree_name) 

239 sp = species_from_ensembl_tree(tree) 

240 species_dbs.update(sp) 

241 

242 return Config( 

243 host=host, 

244 remote_path=remote_path, 

245 release=release, 

246 staging_path=staging_path, 

247 install_path=install_path, 

248 species_dbs=species_dbs, 

249 align_names=align_names, 

250 tree_names=tree_names, 

251 )