Coverage for src/epublib/package/manifest.py: 91%

176 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1import re 

2from dataclasses import dataclass 

3from pathlib import Path 

4from typing import ClassVar, Literal, Self, overload, override 

5 

6import bs4 

7 

8from epublib.exceptions import EPUBError 

9from epublib.identifier import EPUBId 

10from epublib.package.spine import SpineItemRef 

11from epublib.resources import Resource 

12from epublib.util import attr_to_str, get_absolute_href, get_relative_href 

13from epublib.xml_element import XMLElement, XMLParent 

14 

15 

16def detect_remote_resources(soup: bs4.BeautifulSoup): 

17 for attr in "src", "href": 

18 for tag in soup.select(f"[{attr}]"): 

19 ref = attr_to_str(tag.get("src")) 

20 if ref is not None: 

21 if re.search(r"^\w+://.*$", ref): 

22 return True 

23 

24 if ref.startswith("/"): 

25 return True 

26 

27 return False 

28 

29 

30def detect_manifest_properties(soup: bs4.BeautifulSoup) -> list[str]: 

31 properties: list[str] = [] 

32 

33 if soup.find("math"): 

34 properties.append("math") 

35 

36 if detect_remote_resources(soup): 

37 properties.append("remote-resources") 

38 

39 if soup.find("script"): 

40 properties.append("scripted") 

41 

42 if soup.find("epub:switch"): 

43 properties.append("switch") 

44 

45 return properties 

46 

47 

48@dataclass(kw_only=True) 

49class ManifestItem(XMLElement): 

50 """An item in the EPUB manifest.""" 

51 

52 id: EPUBId 

53 media_type: str 

54 fallback: str | None = None 

55 media_overlay: str | None = None 

56 properties: list[str] | None = None 

57 _href: str = "" 

58 manifest_filename: str 

59 

60 exclude_from_tag: ClassVar[list[str]] = ["tag", "name", "manifest_filename"] 

61 obj_to_tag: ClassVar[dict[str, str]] = {"_href": "href"} 

62 

63 @property 

64 @override 

65 def tag_name(self): 

66 return "item" 

67 

68 @override 

69 @classmethod 

70 def from_tag( 

71 cls, 

72 tag: bs4.Tag, 

73 filename: str = "", 

74 manifest_filename: str = "", 

75 **kwargs: str, 

76 ) -> Self: 

77 assert filename, "Can't initialize manifest item without absolute filename" 

78 assert manifest_filename, ( 

79 "Can't initialize manifest item without manifest filename" 

80 ) 

81 

82 return super().from_tag( 

83 tag=tag, 

84 name=filename, 

85 manifest_filename=manifest_filename, 

86 ) 

87 

88 @property 

89 def filename(self): 

90 return self.name 

91 

92 @filename.setter 

93 def filename(self, value: str): 

94 self.name: str = value 

95 self._href = get_relative_href(self.manifest_filename, value) 

96 

97 @property 

98 def href(self): 

99 return self._href 

100 

101 @href.setter 

102 def href(self, value: str): 

103 self._href = value 

104 self.name = get_absolute_href(self.manifest_filename, value) 

105 

106 def __post_init__(self): 

107 super().__post_init__() 

108 self.id = EPUBId(self.id) 

109 self._href = self._href or get_relative_href(self.manifest_filename, self.name) 

110 

111 def add_property(self, prop: str): 

112 if self.properties is None: 

113 self.properties = [] 

114 if prop not in self.properties: 

115 self.properties.append(prop) 

116 

117 def has_property(self, prop: str) -> bool: 

118 if self.properties is None: 

119 return False 

120 return prop in self.properties 

121 

122 def remove_property(self, prop: str): 

123 if self.properties is None: 

124 return 

125 try: 

126 self.properties.remove(prop) 

127 except ValueError: 

128 pass 

129 

130 if not self.properties: 

131 self.properties = None 

132 

133 

134class BookManifest(XMLParent[ManifestItem]): 

135 """The EPUB manifest, which is a list of all resources in the book.""" 

136 

137 def __init__(self, tag: bs4.Tag, filename: str) -> None: 

138 self._resource_filename: str = filename 

139 self._nav: ManifestItem | None = None 

140 self._cover_image: ManifestItem | None = None 

141 

142 super().__init__(tag) 

143 

144 @override 

145 def create_items(self) -> list[ManifestItem]: 

146 items: list[ManifestItem] = [] 

147 

148 for tag in self.tag.select("item"): 

149 absolute_href = get_absolute_href( 

150 self._resource_filename, 

151 attr_to_str(tag["href"]), 

152 ) 

153 item = ManifestItem.from_tag( 

154 tag, 

155 absolute_href, 

156 manifest_filename=self._resource_filename, 

157 ) 

158 items.append(item) 

159 

160 if item.properties: 

161 if "nav" in item.properties: 

162 self._nav = item 

163 

164 if "cover-image" in item.properties: 

165 self._cover_image = item 

166 

167 return items 

168 

169 @property 

170 def nav(self): 

171 if self._nav is None: 

172 self._nav = next( 

173 ( 

174 item 

175 for item in self.items 

176 if item.properties and "nav" in item.properties 

177 ), 

178 None, 

179 ) 

180 return self._nav 

181 

182 @property 

183 def cover_image(self): 

184 if self._cover_image is None: 

185 self._cover_image = next( 

186 ( 

187 item 

188 for item in self.items 

189 if item.properties and "cover-image" in item.properties 

190 ), 

191 None, 

192 ) 

193 return self._cover_image 

194 

195 def set_cover_image(self, item: ManifestItem | str | Path | EPUBId): 

196 if not isinstance(item, ManifestItem): 

197 if isinstance(item, EPUBId): 

198 item = self._get_by_id(item, raise_error=True) 

199 else: 

200 item = self[item] 

201 

202 item.add_property("cover-image") 

203 for other in self.items: 

204 if other is not item: 

205 other.remove_property("cover-image") 

206 

207 @override 

208 def add_item(self, item: ManifestItem) -> ManifestItem: 

209 if item in self.items: 

210 raise EPUBError(f"Item {item} is already in the manifest") 

211 

212 if any( 

213 item.id == other.id or item.filename == other.filename 

214 for other in self.items 

215 ): 

216 if any(item.id == other.id for other in self.items): 

217 raise EPUBError(f"An item with id {item.id} is already in the manifest") 

218 

219 if any(item.filename == other.filename for other in self.items): 

220 raise EPUBError( 

221 f"An item with filename {item.filename} is already in the manifest" 

222 ) 

223 

224 return super().add_item(item) 

225 

226 @overload 

227 def _get_by_id(self, id: EPUBId, raise_error: Literal[True]) -> ManifestItem: ... 

228 

229 @overload 

230 def _get_by_id( 

231 self, 

232 id: EPUBId, 

233 raise_error: bool = False, 

234 ) -> ManifestItem | None: ... 

235 

236 def _get_by_id(self, id: EPUBId, raise_error: bool = False): 

237 try: 

238 return next(item for item in self.items if item.id == id) 

239 except StopIteration as exception: 

240 if raise_error: 

241 raise KeyError(id) from exception 

242 return None 

243 

244 @override 

245 def __getitem__(self, name: Path | str | EPUBId | SpineItemRef): 

246 value = self.get(name) 

247 if value is None: 

248 raise KeyError(name) 

249 return value 

250 

251 @override 

252 def get( 

253 self, 

254 name: str | Path | Resource | SpineItemRef, 

255 cls: type[XMLElement] | None = None, 

256 ): 

257 if isinstance(name, (EPUBId, SpineItemRef)): 

258 if isinstance(name, SpineItemRef): 

259 name = name.idref 

260 item = self._get_by_id(name, raise_error=False) 

261 if item is None: 

262 return None 

263 name = item.filename 

264 

265 elif isinstance(name, Resource): 

266 name = name.filename 

267 

268 return super().get(str(name)) 

269 

270 def remove(self, filename: str | EPUBId): 

271 if isinstance(filename, EPUBId): 

272 filename = self._get_by_id(filename, raise_error=True).filename 

273 return self.remove_item(self[filename]) 

274 

275 def get_new_id(self, filename: str | Path): 

276 path = Path(filename) 

277 stem = path.stem 

278 suffix = path.suffix 

279 

280 new_id = f"{stem}{suffix}" 

281 

282 i = 1 

283 while self._get_by_id(EPUBId(new_id)) and i < 1000: 

284 i += 1 

285 new_id = f"{new_id}-{i}{suffix}" 

286 

287 return EPUBId(new_id)