Coverage for src/epublib/package/manifest.py: 91%
176 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
1import re
2from dataclasses import dataclass
3from pathlib import Path
4from typing import ClassVar, Literal, Self, overload, override
6import bs4
8from epublib.exceptions import EPUBError
9from epublib.identifier import EPUBId
10from epublib.package.spine import SpineItemRef
11from epublib.resources import Resource
12from epublib.util import attr_to_str, get_absolute_href, get_relative_href
13from epublib.xml_element import XMLElement, XMLParent
16def detect_remote_resources(soup: bs4.BeautifulSoup):
17 for attr in "src", "href":
18 for tag in soup.select(f"[{attr}]"):
19 ref = attr_to_str(tag.get("src"))
20 if ref is not None:
21 if re.search(r"^\w+://.*$", ref):
22 return True
24 if ref.startswith("/"):
25 return True
27 return False
30def detect_manifest_properties(soup: bs4.BeautifulSoup) -> list[str]:
31 properties: list[str] = []
33 if soup.find("math"):
34 properties.append("math")
36 if detect_remote_resources(soup):
37 properties.append("remote-resources")
39 if soup.find("script"):
40 properties.append("scripted")
42 if soup.find("epub:switch"):
43 properties.append("switch")
45 return properties
48@dataclass(kw_only=True)
49class ManifestItem(XMLElement):
50 """An item in the EPUB manifest."""
52 id: EPUBId
53 media_type: str
54 fallback: str | None = None
55 media_overlay: str | None = None
56 properties: list[str] | None = None
57 _href: str = ""
58 manifest_filename: str
60 exclude_from_tag: ClassVar[list[str]] = ["tag", "name", "manifest_filename"]
61 obj_to_tag: ClassVar[dict[str, str]] = {"_href": "href"}
63 @property
64 @override
65 def tag_name(self):
66 return "item"
68 @override
69 @classmethod
70 def from_tag(
71 cls,
72 tag: bs4.Tag,
73 filename: str = "",
74 manifest_filename: str = "",
75 **kwargs: str,
76 ) -> Self:
77 assert filename, "Can't initialize manifest item without absolute filename"
78 assert manifest_filename, (
79 "Can't initialize manifest item without manifest filename"
80 )
82 return super().from_tag(
83 tag=tag,
84 name=filename,
85 manifest_filename=manifest_filename,
86 )
88 @property
89 def filename(self):
90 return self.name
92 @filename.setter
93 def filename(self, value: str):
94 self.name: str = value
95 self._href = get_relative_href(self.manifest_filename, value)
97 @property
98 def href(self):
99 return self._href
101 @href.setter
102 def href(self, value: str):
103 self._href = value
104 self.name = get_absolute_href(self.manifest_filename, value)
106 def __post_init__(self):
107 super().__post_init__()
108 self.id = EPUBId(self.id)
109 self._href = self._href or get_relative_href(self.manifest_filename, self.name)
111 def add_property(self, prop: str):
112 if self.properties is None:
113 self.properties = []
114 if prop not in self.properties:
115 self.properties.append(prop)
117 def has_property(self, prop: str) -> bool:
118 if self.properties is None:
119 return False
120 return prop in self.properties
122 def remove_property(self, prop: str):
123 if self.properties is None:
124 return
125 try:
126 self.properties.remove(prop)
127 except ValueError:
128 pass
130 if not self.properties:
131 self.properties = None
134class BookManifest(XMLParent[ManifestItem]):
135 """The EPUB manifest, which is a list of all resources in the book."""
137 def __init__(self, tag: bs4.Tag, filename: str) -> None:
138 self._resource_filename: str = filename
139 self._nav: ManifestItem | None = None
140 self._cover_image: ManifestItem | None = None
142 super().__init__(tag)
144 @override
145 def create_items(self) -> list[ManifestItem]:
146 items: list[ManifestItem] = []
148 for tag in self.tag.select("item"):
149 absolute_href = get_absolute_href(
150 self._resource_filename,
151 attr_to_str(tag["href"]),
152 )
153 item = ManifestItem.from_tag(
154 tag,
155 absolute_href,
156 manifest_filename=self._resource_filename,
157 )
158 items.append(item)
160 if item.properties:
161 if "nav" in item.properties:
162 self._nav = item
164 if "cover-image" in item.properties:
165 self._cover_image = item
167 return items
169 @property
170 def nav(self):
171 if self._nav is None:
172 self._nav = next(
173 (
174 item
175 for item in self.items
176 if item.properties and "nav" in item.properties
177 ),
178 None,
179 )
180 return self._nav
182 @property
183 def cover_image(self):
184 if self._cover_image is None:
185 self._cover_image = next(
186 (
187 item
188 for item in self.items
189 if item.properties and "cover-image" in item.properties
190 ),
191 None,
192 )
193 return self._cover_image
195 def set_cover_image(self, item: ManifestItem | str | Path | EPUBId):
196 if not isinstance(item, ManifestItem):
197 if isinstance(item, EPUBId):
198 item = self._get_by_id(item, raise_error=True)
199 else:
200 item = self[item]
202 item.add_property("cover-image")
203 for other in self.items:
204 if other is not item:
205 other.remove_property("cover-image")
207 @override
208 def add_item(self, item: ManifestItem) -> ManifestItem:
209 if item in self.items:
210 raise EPUBError(f"Item {item} is already in the manifest")
212 if any(
213 item.id == other.id or item.filename == other.filename
214 for other in self.items
215 ):
216 if any(item.id == other.id for other in self.items):
217 raise EPUBError(f"An item with id {item.id} is already in the manifest")
219 if any(item.filename == other.filename for other in self.items):
220 raise EPUBError(
221 f"An item with filename {item.filename} is already in the manifest"
222 )
224 return super().add_item(item)
226 @overload
227 def _get_by_id(self, id: EPUBId, raise_error: Literal[True]) -> ManifestItem: ...
229 @overload
230 def _get_by_id(
231 self,
232 id: EPUBId,
233 raise_error: bool = False,
234 ) -> ManifestItem | None: ...
236 def _get_by_id(self, id: EPUBId, raise_error: bool = False):
237 try:
238 return next(item for item in self.items if item.id == id)
239 except StopIteration as exception:
240 if raise_error:
241 raise KeyError(id) from exception
242 return None
244 @override
245 def __getitem__(self, name: Path | str | EPUBId | SpineItemRef):
246 value = self.get(name)
247 if value is None:
248 raise KeyError(name)
249 return value
251 @override
252 def get(
253 self,
254 name: str | Path | Resource | SpineItemRef,
255 cls: type[XMLElement] | None = None,
256 ):
257 if isinstance(name, (EPUBId, SpineItemRef)):
258 if isinstance(name, SpineItemRef):
259 name = name.idref
260 item = self._get_by_id(name, raise_error=False)
261 if item is None:
262 return None
263 name = item.filename
265 elif isinstance(name, Resource):
266 name = name.filename
268 return super().get(str(name))
270 def remove(self, filename: str | EPUBId):
271 if isinstance(filename, EPUBId):
272 filename = self._get_by_id(filename, raise_error=True).filename
273 return self.remove_item(self[filename])
275 def get_new_id(self, filename: str | Path):
276 path = Path(filename)
277 stem = path.stem
278 suffix = path.suffix
280 new_id = f"{stem}{suffix}"
282 i = 1
283 while self._get_by_id(EPUBId(new_id)) and i < 1000:
284 i += 1
285 new_id = f"{new_id}-{i}{suffix}"
287 return EPUBId(new_id)