Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/_config.py: 94%
157 statements
« prev ^ index » next coverage.py v7.2.3, created at 2024-03-25 13:40 +1100
« prev ^ index » next coverage.py v7.2.3, created at 2024-03-25 13:40 +1100
1import configparser
2import fnmatch
3import pathlib
5from dataclasses import dataclass
6from typing import Iterable
8from ensembl_lite._species import Species, species_from_ensembl_tree
11INSTALLED_CONFIG_NAME = "installed.cfg"
12DOWNLOADED_CONFIG_NAME = "downloaded.cfg"
14_COMPARA_NAME: str = "compara"
15_ALIGNS_NAME: str = "aligns"
16_HOMOLOGIES_NAME: str = "homologies"
17_GENOMES_NAME: str = "genomes"
20@dataclass
21class Config:
22 host: str
23 remote_path: str
24 release: str
25 staging_path: pathlib.Path
26 install_path: pathlib.Path
27 species_dbs: dict[str, list[str]]
28 align_names: Iterable[str]
29 tree_names: Iterable[str]
31 def update_species(self, species: dict[str, list[str]]):
32 if not species:
33 return
34 for k in species:
35 if k not in Species:
36 raise ValueError(f"Unknown species {k}")
37 self.species_dbs |= species
39 @property
40 def db_names(self) -> Iterable[str]:
41 for species in self.species_dbs:
42 yield Species.get_ensembl_db_prefix(species)
44 @property
45 def staging_genomes(self):
46 return self.staging_path / _GENOMES_NAME
48 @property
49 def install_genomes(self):
50 return self.install_path / _GENOMES_NAME
52 @property
53 def staging_homologies(self):
54 return self.staging_path / _COMPARA_NAME / _HOMOLOGIES_NAME
56 @property
57 def install_homologies(self):
58 return self.install_path / _COMPARA_NAME / _HOMOLOGIES_NAME
60 @property
61 def staging_aligns(self):
62 return self.staging_path / _COMPARA_NAME / _ALIGNS_NAME
64 @property
65 def install_aligns(self):
66 return self.install_path / _COMPARA_NAME / _ALIGNS_NAME
68 def to_dict(self):
69 """returns cfg as a dict"""
70 if not self.db_names:
71 raise ValueError("no db names")
73 data = {
74 "remote path": {"path": str(self.remote_path), "host": str(self.host)},
75 "local path": {
76 "staging_path": str(self.staging_path),
77 "install_path": str(self.install_path),
78 },
79 "release": {"release": self.release},
80 }
82 if self.align_names or self.tree_names:
83 data["compara"] = {}
85 if self.align_names:
86 data["compara"]["align_names"] = "".join(self.align_names)
87 if self.tree_names:
88 data["compara"]["tree_names"] = "".join(self.tree_names)
90 for db_name in self.db_names:
91 data[db_name] = {"db": "core"}
93 return data
95 def write(self):
96 """writes a ini to staging_path/DOWNLOADED_CONFIG_NAME"""
97 parser = configparser.ConfigParser()
98 cfg = self.to_dict()
99 for section, settings in cfg.items():
100 parser.add_section(section)
101 for option, val in settings.items():
102 parser.set(section, option=option, value=val)
103 self.staging_path.mkdir(parents=True, exist_ok=True)
104 with (self.staging_path / DOWNLOADED_CONFIG_NAME).open(mode="w") as out:
105 parser.write(out, space_around_delimiters=True)
108@dataclass
109class InstalledConfig:
110 release: str
111 install_path: pathlib.Path
113 def __hash__(self):
114 return id(self)
116 def __post_init__(self):
117 self.install_path = pathlib.Path(self.install_path)
119 @property
120 def compara_path(self):
121 return self.install_path / _COMPARA_NAME
123 @property
124 def homologies_path(self):
125 return self.compara_path / _HOMOLOGIES_NAME
127 @property
128 def aligns_path(self):
129 return self.compara_path / _ALIGNS_NAME
131 @property
132 def genomes_path(self):
133 return self.install_path / _GENOMES_NAME
135 def installed_genome(self, species: str) -> pathlib.Path:
136 db_name = Species.get_ensembl_db_prefix(species)
137 return self.genomes_path / db_name
139 def list_genomes(self):
140 """returns list of installed genomes"""
141 return [p.name for p in self.genomes_path.glob("*") if p.name in Species]
143 def path_to_alignment(self, pattern: str) -> pathlib.Path | None:
144 """returns the full path to alignment matching the name
146 Parameters
147 ----------
148 pattern
149 glob pattern for the Ensembl alignment name
150 """
151 align_dirs = [
152 d for d in self.aligns_path.glob("*") if fnmatch.fnmatch(d.name, pattern)
153 ]
154 if not align_dirs:
155 return None
157 if len(align_dirs) > 1:
158 raise ValueError(
159 f"{pattern!r} matches too many directories in {self.aligns_path}"
160 )
162 return align_dirs[0]
165def write_installed_cfg(config: Config) -> pathlib.Path:
166 """writes an ini file under config.installed_path"""
167 parser = configparser.ConfigParser()
168 parser.add_section("release")
169 parser.set("release", "release", config.release)
170 # create all the genome
171 outpath = config.install_path / INSTALLED_CONFIG_NAME
172 outpath.parent.mkdir(parents=True, exist_ok=True)
173 with outpath.open(mode="w") as out:
174 parser.write(out)
175 return outpath
178def read_installed_cfg(path: pathlib.Path) -> InstalledConfig:
179 """reads an ini file under config.installed_path"""
180 parser = configparser.ConfigParser()
181 path = (
182 path if path.name == INSTALLED_CONFIG_NAME else (path / INSTALLED_CONFIG_NAME)
183 )
184 if not path.exists():
185 print(f"{str(path)} does not exist, exiting")
186 exit(1)
188 parser.read(path)
189 release = parser.get("release", "release")
190 return InstalledConfig(release=release, install_path=path.parent)
193def read_config(config_path) -> Config:
194 """returns ensembl release, local path, and db specifics from the provided
195 config path"""
196 from ensembl_lite._download import download_ensembl_tree
198 parser = configparser.ConfigParser()
200 with config_path.expanduser().open() as f:
201 parser.read_file(f)
203 release = parser.get("release", "release")
204 host = parser.get("remote path", "host")
205 remote_path = parser.get("remote path", "path")
206 remote_path = remote_path[:-1] if remote_path.endswith("/") else remote_path
207 staging_path = (
208 pathlib.Path(parser.get("local path", "staging_path")).expanduser().absolute()
209 )
210 install_path = (
211 pathlib.Path(parser.get("local path", "install_path")).expanduser().absolute()
212 )
214 species_dbs = {}
215 get_option = parser.get
216 align_names = []
217 tree_names = []
218 for section in parser.sections():
219 if section in ("release", "remote path", "local path"):
220 continue
222 if section == "compara":
223 value = get_option(section, "align_names", fallback=None)
224 align_names = [] if value is None else [n.strip() for n in value.split(",")]
225 value = get_option(section, "tree_names", fallback=None)
226 tree_names = [] if value is None else [n.strip() for n in value.split(",")]
227 continue
229 dbs = [db.strip() for db in get_option(section, "db").split(",")]
231 # handle synonyms
232 species = Species.get_species_name(section, level="raise")
233 species_dbs[species] = dbs
235 if tree_names:
236 # add all species in the tree to species_dbs
237 for tree_name in tree_names:
238 tree = download_ensembl_tree(host, remote_path, release, tree_name)
239 sp = species_from_ensembl_tree(tree)
240 species_dbs.update(sp)
242 return Config(
243 host=host,
244 remote_path=remote_path,
245 release=release,
246 staging_path=staging_path,
247 install_path=install_path,
248 species_dbs=species_dbs,
249 align_names=align_names,
250 tree_names=tree_names,
251 )