Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/_config.py: 93%
189 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-06-12 16:32 -0400
« prev ^ index » next coverage.py v7.5.1, created at 2024-06-12 16:32 -0400
1import configparser
2import fnmatch
3import pathlib
4import typing
6from dataclasses import dataclass
7from typing import Iterable
9import click
11from ensembl_lite._species import Species, species_from_ensembl_tree
12from ensembl_lite._util import PathType
15INSTALLED_CONFIG_NAME = "installed.cfg"
16DOWNLOADED_CONFIG_NAME = "downloaded.cfg"
18_COMPARA_NAME: str = "compara"
19_ALIGNS_NAME: str = "aligns"
20_HOMOLOGIES_NAME: str = "homologies"
21_GENOMES_NAME: str = "genomes"
24def make_relative_to(
25 staging_path: pathlib.Path, install_path: pathlib.Path
26) -> pathlib.Path:
27 assert staging_path.is_absolute() and install_path.is_absolute()
29 for i, (s_part, i_part) in enumerate(zip(staging_path.parts, install_path.parts)):
30 if s_part != i_part:
31 break
32 change_up = ("..",) * (len(staging_path.parts) - i)
33 rel_path = change_up + install_path.parts[i:]
34 return pathlib.Path(*rel_path)
37@dataclass
38class Config:
39 host: str
40 remote_path: str
41 release: str
42 staging_path: pathlib.Path
43 install_path: pathlib.Path
44 species_dbs: dict[str, list[str]]
45 align_names: Iterable[str]
46 tree_names: Iterable[str]
47 homologies: bool
49 def __post_init__(self):
50 self.staging_path = pathlib.Path(self.staging_path)
51 self.install_path = pathlib.Path(self.install_path)
53 def update_species(self, species: dict[str, list[str]]):
54 if not species:
55 return
56 for k in species:
57 if k not in Species:
58 raise ValueError(f"Unknown species {k}")
59 self.species_dbs |= species
61 @property
62 def db_names(self) -> Iterable[str]:
63 for species in self.species_dbs:
64 yield Species.get_ensembl_db_prefix(species)
66 @property
67 def staging_genomes(self) -> pathlib.Path:
68 return self.staging_path / _GENOMES_NAME
70 @property
71 def install_genomes(self) -> pathlib.Path:
72 return self.install_path / _GENOMES_NAME
74 @property
75 def staging_homologies(self) -> pathlib.Path:
76 return self.staging_path / _COMPARA_NAME / _HOMOLOGIES_NAME
78 @property
79 def install_homologies(self) -> pathlib.Path:
80 return self.install_path / _COMPARA_NAME / _HOMOLOGIES_NAME
82 @property
83 def staging_aligns(self) -> pathlib.Path:
84 return self.staging_path / _COMPARA_NAME / _ALIGNS_NAME
86 @property
87 def install_aligns(self) -> pathlib.Path:
88 return self.install_path / _COMPARA_NAME / _ALIGNS_NAME
90 def to_dict(self, relative_paths: bool = True) -> dict[str, str]:
91 """returns cfg as a dict"""
92 if not self.db_names:
93 raise ValueError("no db names")
95 if not relative_paths:
96 staging_path = str(self.staging_path)
97 install_path = str(self.install_path)
98 else:
99 staging_path = "."
100 install_path = str(make_relative_to(self.staging_path, self.install_path))
102 data = {
103 "remote path": {"path": str(self.remote_path), "host": str(self.host)},
104 "local path": {
105 "staging_path": staging_path,
106 "install_path": install_path,
107 },
108 "release": {"release": self.release},
109 }
111 if self.align_names or self.tree_names:
112 data["compara"] = {}
114 if self.align_names:
115 data["compara"]["align_names"] = "".join(self.align_names)
116 if self.tree_names:
117 data["compara"]["tree_names"] = "".join(self.tree_names)
119 if self.homologies:
120 data["compara"]["homologies"] = ""
122 for db_name in self.db_names:
123 data[db_name] = {"db": "core"}
125 return data
127 def write(self):
128 """writes a ini to staging_path/DOWNLOADED_CONFIG_NAME
130 Notes
131 -----
132 Updates value for staging_path to '.', and install directories to be
133 relative to staging_path.
134 """
135 parser = configparser.ConfigParser()
136 cfg = self.to_dict()
137 for section, settings in cfg.items():
138 parser.add_section(section)
139 for option, val in settings.items():
140 parser.set(section, option=option, value=val)
141 self.staging_path.mkdir(parents=True, exist_ok=True)
142 with (self.staging_path / DOWNLOADED_CONFIG_NAME).open(mode="w") as out:
143 parser.write(out, space_around_delimiters=True)
146@dataclass
147class InstalledConfig:
148 release: str
149 install_path: pathlib.Path
151 def __hash__(self):
152 return id(self)
154 def __post_init__(self):
155 self.install_path = pathlib.Path(self.install_path)
157 @property
158 def compara_path(self) -> pathlib.Path:
159 return self.install_path / _COMPARA_NAME
161 @property
162 def homologies_path(self) -> pathlib.Path:
163 return self.compara_path / _HOMOLOGIES_NAME
165 @property
166 def aligns_path(self) -> pathlib.Path:
167 return self.compara_path / _ALIGNS_NAME
169 @property
170 def genomes_path(self) -> pathlib.Path:
171 return self.install_path / _GENOMES_NAME
173 def installed_genome(self, species: str) -> pathlib.Path:
174 db_name = Species.get_ensembl_db_prefix(species)
175 return self.genomes_path / db_name
177 def list_genomes(self):
178 """returns list of installed genomes"""
179 return [p.name for p in self.genomes_path.glob("*") if p.name in Species]
181 def path_to_alignment(self, pattern: str) -> pathlib.Path | None:
182 """returns the full path to alignment matching the name
184 Parameters
185 ----------
186 pattern
187 glob pattern for the Ensembl alignment name
188 """
189 align_dirs = [
190 d for d in self.aligns_path.glob("*") if fnmatch.fnmatch(d.name, pattern)
191 ]
192 if not align_dirs:
193 return None
195 if len(align_dirs) > 1:
196 raise ValueError(
197 f"{pattern!r} matches too many directories in {self.aligns_path}"
198 )
200 return align_dirs[0]
203def write_installed_cfg(config: Config) -> PathType:
204 """writes an ini file under config.installed_path"""
205 parser = configparser.ConfigParser()
206 parser.add_section("release")
207 parser.set("release", "release", config.release)
208 # create all the genome
209 outpath = config.install_path / INSTALLED_CONFIG_NAME
210 outpath.parent.mkdir(parents=True, exist_ok=True)
211 with outpath.open(mode="w") as out:
212 parser.write(out)
213 return outpath
216def read_installed_cfg(path: PathType) -> InstalledConfig:
217 """reads an ini file under config.installed_path"""
218 parser = configparser.ConfigParser()
219 path = (
220 path if path.name == INSTALLED_CONFIG_NAME else (path / INSTALLED_CONFIG_NAME)
221 )
222 if not path.exists():
223 print(f"{str(path)} does not exist, exiting")
224 exit(1)
226 parser.read(path)
227 release = parser.get("release", "release")
228 return InstalledConfig(release=release, install_path=path.parent)
231def _standardise_path(path: str, config_path: pathlib.Path) -> pathlib.Path:
232 path = pathlib.Path(path).expanduser()
233 return path if path.is_absolute() else (config_path / path).resolve()
236def read_config(
237 config_path: pathlib.Path, root_dir: typing.Optional[pathlib.Path] = None
238) -> Config:
239 """returns ensembl release, local path, and db specifics from the provided
240 config path"""
241 from ensembl_lite._download import download_ensembl_tree
243 if not config_path.exists():
244 click.secho(f"File not found {config_path.resolve()!s}", fg="red")
245 exit(1)
247 parser = configparser.ConfigParser()
249 with config_path.expanduser().open() as f:
250 parser.read_file(f)
252 if root_dir is None:
253 root_dir = config_path.parent
255 release = parser.get("release", "release")
256 host = parser.get("remote path", "host")
257 remote_path = parser.get("remote path", "path")
258 remote_path = remote_path[:-1] if remote_path.endswith("/") else remote_path
259 # paths
260 staging_path = _standardise_path(parser.get("local path", "staging_path"), root_dir)
261 install_path = _standardise_path(parser.get("local path", "install_path"), root_dir)
263 homologies = parser.has_option("compara", "homologies")
264 species_dbs = {}
265 get_option = parser.get
266 align_names = []
267 tree_names = []
268 for section in parser.sections():
269 if section in ("release", "remote path", "local path"):
270 continue
272 if section == "compara":
273 value = get_option(section, "align_names", fallback=None)
274 align_names = [] if value is None else [n.strip() for n in value.split(",")]
275 value = get_option(section, "tree_names", fallback=None)
276 tree_names = [] if value is None else [n.strip() for n in value.split(",")]
277 continue
279 dbs = [db.strip() for db in get_option(section, "db").split(",")]
281 # handle synonyms
282 species = Species.get_species_name(section, level="raise")
283 species_dbs[species] = dbs
285 # we also want homologies if we want alignments
286 homologies = homologies or bool(align_names)
288 if tree_names:
289 # add all species in the tree to species_dbs
290 for tree_name in tree_names:
291 tree = download_ensembl_tree(host, remote_path, release, tree_name)
292 sp = species_from_ensembl_tree(tree)
293 species_dbs.update(sp)
295 return Config(
296 host=host,
297 remote_path=remote_path,
298 release=release,
299 staging_path=staging_path,
300 install_path=install_path,
301 species_dbs=species_dbs,
302 align_names=align_names,
303 tree_names=tree_names,
304 homologies=homologies,
305 )