Coverage for src/epublib/package/manifest.py: 98%
121 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-06 17:34 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-06 17:34 -0300
1import re
2from dataclasses import dataclass
3from pathlib import Path
4from typing import Annotated, ClassVar, Literal, SupportsIndex, overload, override
6import bs4
8from epublib.exceptions import EPUBError
9from epublib.identifier import EPUBId
10from epublib.package.spine import SpineItemRef
11from epublib.resources import Resource
12from epublib.util import attr_to_str, strip_fragment
13from epublib.xml_element import (
14 HrefElement,
15 ParentOfHref,
16 XMLAttribute,
17 XMLElement,
18)
21def detect_remote_resources(soup: bs4.BeautifulSoup):
22 for attr in "src", "href":
23 for tag in soup.find_all(attrs={attr: True}):
24 ref = attr_to_str(tag.get(attr))
25 if ref is not None:
26 if re.search(r"^\w+://.*$", ref):
27 return True
29 if ref.startswith("/"):
30 return True
32 return False
35def detect_manifest_properties(soup: bs4.BeautifulSoup) -> list[str]:
36 properties: list[str] = []
38 if soup.find("math"):
39 properties.append("mathml")
41 if detect_remote_resources(soup):
42 properties.append("remote-resources")
44 if soup.find("script"):
45 properties.append("scripted")
47 if soup.find("epub:switch"):
48 properties.append("switch")
50 return properties
53@dataclass(kw_only=True)
54class ManifestItem(HrefElement):
55 """An item in the EPUB manifest."""
57 id: Annotated[EPUBId, XMLAttribute()]
58 media_type: Annotated[str, XMLAttribute("media-type")]
59 fallback: Annotated[str | None, XMLAttribute()] = None
60 media_overlay: Annotated[str | None, XMLAttribute("media-overlay")] = None
61 properties: Annotated[list[str] | None, XMLAttribute()] = None
63 tag_name: ClassVar[str] = "item"
65 def __post_init__(self):
66 super().__post_init__()
67 self.id = EPUBId(self.id)
69 def add_property(self, prop: str):
70 if self.properties is None:
71 self.properties = []
72 if prop not in self.properties:
73 self.properties.append(prop)
75 self.update_tag("properties", self.properties)
77 def has_property(self, prop: str) -> bool:
78 if self.properties is None:
79 return False
80 return prop in self.properties
82 def remove_property(self, prop: str):
83 if self.properties is None:
84 return
85 try:
86 self.properties.remove(prop)
87 except ValueError:
88 pass
90 if not self.properties:
91 self.properties = None
93 self.update_tag("properties", self.properties)
96type ItemIdentifier = str | Path | Resource | SpineItemRef | EPUBId
99class BookManifest(ParentOfHref[ManifestItem]):
100 """The EPUB manifest, which is a list of all resources in the book."""
102 def __post_init__(self) -> None:
103 super().__post_init__()
104 self._cover_image: ManifestItem | None = None
106 @property
107 def nav(self):
108 try:
109 return next(
110 (
111 item
112 for item in self.items
113 if item.properties and "nav" in item.properties
114 ),
115 )
116 except StopIteration as error:
117 raise EPUBError("No navigation document found in manifest") from error
119 @property
120 def cover_image(self):
121 return next(
122 (
123 item
124 for item in self.items
125 if item.properties and "cover-image" in item.properties
126 ),
127 None,
128 )
130 @override
131 def add_item(self, item: ManifestItem) -> ManifestItem:
132 if item in self.items:
133 raise EPUBError(f"Item {item} is already in the manifest")
135 if any(
136 item.id == other.id or item.filename == other.filename
137 for other in self.items
138 ):
139 if any(item.id == other.id for other in self.items):
140 raise EPUBError(f"An item with id {item.id} is already in the manifest")
142 if any(item.filename == other.filename for other in self.items):
143 raise EPUBError(
144 f"An item with filename {item.filename} is already in the manifest"
145 )
147 return super().add_item(item)
149 @overload
150 def _get_by_id(self, id: EPUBId, raise_error: Literal[True]) -> ManifestItem: ...
152 @overload
153 def _get_by_id(
154 self,
155 id: EPUBId,
156 raise_error: bool = False,
157 ) -> ManifestItem | None: ...
159 def _get_by_id(self, id: EPUBId, raise_error: bool = False):
160 try:
161 return next(item for item in self.items if item.id == id)
162 except StopIteration as exception:
163 if raise_error:
164 raise KeyError(id) from exception
165 return None
167 @override
168 def __getitem__(
169 self,
170 name: ItemIdentifier | SupportsIndex,
171 ):
172 if isinstance(name, SupportsIndex):
173 return super().__getitem__(name)
175 value = self.get(name)
176 if value is None:
177 raise KeyError(name)
178 return value
180 @override
181 def get( # type: ignore[reportIncompatibleMethodOverride]
182 self,
183 name: ItemIdentifier,
184 cls: type[XMLElement] | None = None,
185 ignore_fragment: bool = True,
186 ):
187 if isinstance(name, (EPUBId, SpineItemRef)):
188 if isinstance(name, SpineItemRef):
189 name = name.idref
190 item = self._get_by_id(name, raise_error=False)
191 if item is None:
192 return None
193 name = item.filename
195 elif isinstance(name, Resource):
196 name = name.filename
198 if ignore_fragment:
199 name = strip_fragment(name)
201 return super().get(str(name), ignore_fragment=False)
203 @override
204 def remove(self, filename: ItemIdentifier, ignore_fragment: bool = True):
205 item = self.get(filename, ignore_fragment=ignore_fragment)
206 if item:
207 return self.remove_item(item)