Coverage for src/epublib/ncx/__init__.py: 91%

242 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1from abc import ABC, abstractmethod 

2from collections.abc import Sequence 

3from dataclasses import dataclass 

4from operator import attrgetter 

5from typing import ClassVar, Self, override 

6 

7import bs4 

8 

9from epublib.exceptions import EPUBError 

10from epublib.nav.util import PageBreakData, TOCEntryData 

11from epublib.reference import NavigationReference, NavigationRoot 

12from epublib.soup import NCXSoup 

13from epublib.util import attr_to_str, get_relative_href, parse_int 

14from epublib.xml_element import ValueType, XMLElement, XMLParent 

15 

16 

17@dataclass(kw_only=True) 

18class NCXMeta(XMLElement): 

19 """A metadata item in the NCX head section.""" 

20 

21 name: str 

22 content: str 

23 

24 @property 

25 @override 

26 def tag_name(self): 

27 return "meta" 

28 

29 @override 

30 @classmethod 

31 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self: 

32 return super().from_tag(tag=tag) 

33 

34 

35class NCXHead(XMLParent[NCXMeta]): 

36 """The head section of the NCX file.""" 

37 

38 def __init__(self, tag: bs4.Tag, filename: str) -> None: 

39 if not tag.name == "head": 

40 raise EPUBError("NCXHead tag must be a <head> element") 

41 

42 self._resource_filename: str = filename 

43 super().__init__(tag) 

44 

45 @override 

46 def create_items(self) -> list[NCXMeta]: 

47 items: list[NCXMeta] = [] 

48 for tag in self.tag.select("meta"): 

49 item = NCXMeta.from_tag(tag) 

50 items.append(item) 

51 return items 

52 

53 def add(self, name: str, content: str) -> NCXMeta: 

54 """Add a new meta item to the head section.""" 

55 

56 return self.add_item(NCXMeta(name=name, content=content)) 

57 

58 @property 

59 def uid(self) -> str: 

60 """The unique identifier of the publication.""" 

61 try: 

62 meta = self["dtb:uid"] 

63 except KeyError as error: 

64 raise EPUBError("Expected 'dtb:uid' in NCX head") from error 

65 return meta.content 

66 

67 @uid.setter 

68 def uid(self, value: str) -> None: 

69 meta = self.get("dtb:uid") 

70 if meta: 

71 meta.content = value 

72 else: 

73 __ = self.add(name="dtb:uid", content=value) 

74 

75 @property 

76 def depth(self) -> int: 

77 """The depth of the navigation map strucutre.""" 

78 try: 

79 meta = self["dtb:depth"] 

80 except KeyError as error: 

81 raise EPUBError("Expected 'dtb:depth' in NCX head") from error 

82 return int(meta.content) 

83 

84 @depth.setter 

85 def depth(self, value: int) -> None: 

86 meta = self.get("dtb:depth") 

87 if meta: 

88 meta.content = str(value) 

89 else: 

90 __ = self.add(name="dtb:depth", content=str(value)) 

91 

92 @property 

93 def total_page_count(self) -> int | None: 

94 """ 

95 Total page count of the publication. If there are no navigable 

96 pages (represented as 0), return None. 

97 """ 

98 

99 try: 

100 meta = self["dtb:totalPageCount"] 

101 except KeyError as error: 

102 raise EPUBError("Expected 'dtb:totalPageCount' in NCX head") from error 

103 int_val = int(meta.content) 

104 

105 return None if int_val == 0 else int_val 

106 

107 @total_page_count.setter 

108 def total_page_count(self, value: int | None) -> None: 

109 meta = self.get("dtb:totalPageCount") 

110 

111 str_value = "0" if value is None else str(value) 

112 

113 if meta: 

114 meta.content = str_value 

115 else: 

116 __ = self.add(name="dtb:totalPageCount", content=str_value) 

117 

118 @property 

119 def max_page_number(self) -> int | None: 

120 """ 

121 Largest value attribute on page targets in the page list. If 

122 there are no navigable pages (represented as 0), return None. 

123 """ 

124 

125 meta = self["dtb:maxPageNumber"] 

126 int_val = int(meta.content) 

127 

128 return None if int_val == 0 else int_val 

129 

130 @max_page_number.setter 

131 def max_page_number(self, value: int | None) -> None: 

132 meta = self.get("dtb:maxPageNumber") 

133 

134 str_value = "0" if value is None else str(value) 

135 

136 if meta: 

137 meta.content = str_value 

138 else: 

139 __ = self.add(name="dtb:maxPageNumber", content=str_value) 

140 

141 

142class NCXDocData(XMLElement, ABC): 

143 """ 

144 Abstract base class for NCX docTitle or docAuthor elements. 

145 """ 

146 

147 id: str | None = None 

148 

149 exclude_from_tag: ClassVar[list[str]] = ["tag", "name", "text"] 

150 

151 @override 

152 @classmethod 

153 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self: 

154 text = tag.select_one("text") 

155 

156 if not text: 

157 raise EPUBError("docAuthor tag must contain a <text> element") 

158 

159 return super().from_tag( 

160 tag=tag, 

161 name=text.string or "", 

162 ) 

163 

164 @override 

165 def create_tag(self, soup: bs4.BeautifulSoup, **kwargs: str) -> bs4.Tag: 

166 tag = super().create_tag(soup, **kwargs) 

167 text = soup.new_tag("text") 

168 text.string = self.name 

169 __ = tag.append(text) 

170 

171 return tag 

172 

173 @override 

174 def update_tag(self, field: str, value: ValueType | None): 

175 if field == "name": 

176 text = self.tag.select_one("text") 

177 if text: 

178 text.string = str(value) 

179 else: 

180 super().update_tag(field, value) 

181 

182 @property 

183 def text(self) -> str: 

184 return self.name 

185 

186 @text.setter 

187 def text(self, value: str) -> None: 

188 self.name: str = value 

189 

190 @abstractmethod 

191 def insert_self_in_soup(self, soup: NCXSoup): 

192 pass 

193 

194 

195class NCXAuthor(NCXDocData): 

196 """Authorship in the NCX file.""" 

197 

198 @property 

199 @override 

200 def tag_name(self): 

201 return "docAuthor" 

202 

203 @override 

204 def insert_self_in_soup(self, soup: NCXSoup): 

205 previous_tag = soup.select("docAuthor, docTitle")[-1] 

206 __ = previous_tag.insert_after(self.tag) 

207 

208 

209class NCXTitle(NCXDocData): 

210 """Title in the NCX file.""" 

211 

212 @property 

213 @override 

214 def tag_name(self): 

215 return "docTitle" 

216 

217 @override 

218 def insert_self_in_soup(self, soup: NCXSoup): 

219 previous_tag = soup.head 

220 __ = previous_tag.insert_after(self.tag) 

221 

222 

223class NCXNavPoint(NavigationReference["NCXNavPoint"]): 

224 """A navigation point in NCX table of contents.""" 

225 

226 tag_name: str = "navPoint" 

227 text_selector: str = "& > navLabel > text" 

228 text_tag_name: str = "text" 

229 href_selector: str = "& > content" 

230 href_tag_name: str = "content" 

231 href_attr: str = "src" 

232 

233 @property 

234 def play_order(self) -> int | None: 

235 return parse_int(attr_to_str(self.tag.get("playOrder"))) 

236 

237 @play_order.setter 

238 def play_order(self, value: int | None) -> None: 

239 if value is None: 

240 if "playOrder" in self.tag.attrs: 

241 del self.tag.attrs["playOrder"] 

242 else: 

243 self.tag["playOrder"] = str(value) 

244 

245 @override 

246 def _create_text(self, value: str) -> bs4.Tag: 

247 nav_label = self.soup.new_tag("navLabel") 

248 text_tag = self.soup.new_tag(self.text_tag_name) 

249 text_tag.string = value 

250 __ = self.tag.insert(0, nav_label) 

251 __ = nav_label.append(text_tag) 

252 

253 return text_tag 

254 

255 @override 

256 def _get_children_tags(self) -> list[bs4.Tag]: 

257 return self.tag.select("navPoint") 

258 

259 @override 

260 def _insert_tag(self, position: int, tag: bs4.Tag): 

261 # Find the last navPoint at the same level 

262 siblings = self.tag.find_all("navPoint", recursive=False) 

263 if position >= len(siblings): 

264 __ = self.tag.append(tag) 

265 else: 

266 __ = siblings[position].insert_before(tag) 

267 

268 

269class NCXNavMap( # type: ignore[reportUnsafeMultipleInheritance] 

270 NavigationRoot[NCXNavPoint, TOCEntryData, NCXSoup], 

271 NCXNavPoint, 

272): 

273 """The navigation map in the NCX file.""" 

274 

275 tag_name: str = "navMap" 

276 child_class: type[NCXNavPoint] = NCXNavPoint # type: ignore[reportIncompatibleVariableOverride] 

277 

278 @override 

279 def _insert_self_in_soup(self): 

280 ncx = self.soup.ncx 

281 if not ncx: 

282 raise EPUBError("Invalid NCX file: couldn't find 'ncx' tag") 

283 

284 for tag_name in ["head", "docTitle", "docAuthor"]: 

285 other = ncx.select(tag_name)[-1] 

286 if other: 

287 __ = other.insert_after(self.tag) 

288 return 

289 

290 __ = ncx.insert(0, self.tag) 

291 

292 @override 

293 def reset(self, entries: Sequence[TOCEntryData]): 

294 new_tag = self._create_own_tag() 

295 __ = self.tag.replace_with(new_tag) 

296 self.tag: bs4.Tag = new_tag 

297 self._items: list[NCXNavPoint] = [] 

298 

299 for entry in entries: 

300 href = f"{get_relative_href(self.base_filename, entry.filename)}" 

301 if entry.id is not None: 

302 href += f"#{entry.id}" 

303 __ = self.add_item(text=entry.label, href=href) 

304 

305 @override 

306 def __repr__(self) -> str: 

307 return f"{self.__class__.__name__}({len(self.items)} items)" 

308 

309 

310class NCXPageTarget(NCXNavPoint): 

311 """A page target in the NCX page list.""" 

312 

313 tag_name: str = "pageTarget" 

314 

315 

316class NCXPageList( # type: ignore[reportUnsafeMultipleInheritance] 

317 NavigationRoot[NCXPageTarget, PageBreakData, NCXSoup], 

318 NCXPageTarget, 

319): 

320 tag_name: str = "pageList" 

321 child_class: type[NCXPageTarget] = NCXPageTarget # type: ignore[reportIncompatibleVariableOverride] 

322 

323 @override 

324 def _insert_self_in_soup(self): 

325 __ = self.soup.navMap.insert_after(self.tag) 

326 

327 @override 

328 def reset(self, entries: Sequence[PageBreakData]): 

329 new_tag = self._create_own_tag() 

330 __ = self.tag.replace_with(new_tag) 

331 self.tag: bs4.Tag = new_tag 

332 self._items: list[NCXPageTarget] = [] # type: ignore[reportIncompatibleVariableOverride] 

333 

334 for pagebreak in sorted(entries, key=attrgetter("page")): 

335 href = f"{get_relative_href(self.base_filename, pagebreak.filename)}#{pagebreak.id}" 

336 __ = self.add_item(text=pagebreak.label, href=href) 

337 

338 @override 

339 def __repr__(self) -> str: 

340 return f"{self.__class__.__name__}({len(self.items)} items)" 

341 

342 @property 

343 def largest_page_number(self) -> int | None: 

344 """The largest page number in the page list.""" 

345 if not self.items: 

346 return None 

347 

348 return max(parse_int(item.text) or 0 for item in self.items) 

349 

350 

351class NCXNavList(NCXNavMap): 

352 """A navigation list in the NCX file.""" 

353 

354 tag_name: str = "navList" 

355 

356 @override 

357 def _insert_self_in_soup(self): 

358 ncx = self.soup.ncx 

359 if not ncx: 

360 raise EPUBError("Invalid NCX file: couldn't find 'ncx' tag") 

361 

362 for tag_name in ["navMap", "pageList"]: 

363 other = ncx.select(tag_name)[-1] 

364 if other: 

365 __ = other.insert_after(self.tag) 

366 return 

367 

368 __ = ncx.insert(0, self.tag)