Coverage for src/epublib/ncx/__init__.py: 97%

241 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-06 17:21 -0300

1from abc import ABC, abstractmethod 

2from collections.abc import Sequence 

3from dataclasses import dataclass 

4from functools import partial 

5from operator import attrgetter 

6from pathlib import Path 

7from typing import Annotated, ClassVar, Literal, Protocol, Self, override 

8 

9import bs4 

10 

11from epublib.exceptions import EPUBError 

12from epublib.nav.util import PageBreakData, TOCEntryData 

13from epublib.soup import NCXSoup 

14from epublib.util import parse_int 

15from epublib.xml_element import ( 

16 HrefElement, 

17 HrefRecursiveElement, 

18 HrefRoot, 

19 ParentOfHref, 

20 SyncType, 

21 XMLAttribute, 

22 XMLElement, 

23 XMLParent, 

24) 

25 

26 

27@dataclass(kw_only=True) 

28class NCXMeta(XMLElement[NCXSoup]): 

29 """A metadata item in the NCX head section.""" 

30 

31 name: Annotated[str, XMLAttribute()] 

32 content: Annotated[str, XMLAttribute()] 

33 

34 tag_name: ClassVar[str] = "meta" 

35 

36 @property 

37 def pk(self) -> str: 

38 return self.name 

39 

40 

41class NCXHead(XMLParent[NCXMeta, NCXSoup]): 

42 """The head section of the NCX file.""" 

43 

44 def __post_init__(self) -> None: 

45 if not self.tag.name == "head": 

46 raise EPUBError("NCXHead tag must be a <head> element") 

47 

48 super().__post_init__() 

49 

50 @override 

51 def add(self, name: str, content: str) -> NCXMeta: # type: ignore[reportIncompatibleMethodOverride] 

52 """Add a new meta item to the head section.""" 

53 

54 return super().add(name=name, content=content) 

55 

56 @property 

57 def uid(self) -> str: 

58 """The unique identifier of the publication.""" 

59 try: 

60 meta = self["dtb:uid"] 

61 except KeyError as error: 

62 raise EPUBError("Expected 'dtb:uid' in NCX head") from error 

63 return meta.content 

64 

65 @uid.setter 

66 def uid(self, value: str) -> None: 

67 meta = self.get("dtb:uid") 

68 if meta: 

69 meta.content = value 

70 else: 

71 __ = self.add(name="dtb:uid", content=value) 

72 

73 @property 

74 def depth(self) -> int: 

75 """The depth of the navigation map strucutre.""" 

76 try: 

77 meta = self["dtb:depth"] 

78 except KeyError as error: 

79 raise EPUBError("Expected 'dtb:depth' in NCX head") from error 

80 return int(meta.content) 

81 

82 @depth.setter 

83 def depth(self, value: int) -> None: 

84 meta = self.get("dtb:depth") 

85 if meta: 

86 meta.content = str(value) 

87 else: 

88 __ = self.add(name="dtb:depth", content=str(value)) 

89 

90 @property 

91 def total_page_count(self) -> int | None: 

92 """ 

93 Total page count of the publication. If there are no navigable 

94 pages (represented as 0), return None. 

95 """ 

96 

97 try: 

98 meta = self["dtb:totalPageCount"] 

99 except KeyError as error: 

100 raise EPUBError("Expected 'dtb:totalPageCount' in NCX head") from error 

101 int_val = int(meta.content) 

102 

103 return None if int_val == 0 else int_val 

104 

105 @total_page_count.setter 

106 def total_page_count(self, value: int | None) -> None: 

107 meta = self.get("dtb:totalPageCount") 

108 

109 str_value = "0" if value is None else str(value) 

110 

111 if meta: 

112 meta.content = str_value 

113 else: 

114 __ = self.add(name="dtb:totalPageCount", content=str_value) 

115 

116 @property 

117 def max_page_number(self) -> int | None: 

118 """ 

119 Largest value attribute on page targets in the page list. If 

120 there are no navigable pages (represented as 0), return None. 

121 """ 

122 

123 meta = self["dtb:maxPageNumber"] 

124 int_val = int(meta.content) 

125 

126 return None if int_val == 0 else int_val 

127 

128 @max_page_number.setter 

129 def max_page_number(self, value: int | None) -> None: 

130 meta = self.get("dtb:maxPageNumber") 

131 

132 str_value = "0" if value is None else str(value) 

133 

134 if meta: 

135 meta.content = str_value 

136 else: 

137 __ = self.add(name="dtb:maxPageNumber", content=str_value) 

138 

139 

140@dataclass(kw_only=True) 

141class NCXDocData(XMLElement[NCXSoup], ABC): 

142 """ 

143 Abstract base class for NCX docTitle or docAuthor elements. 

144 """ 

145 

146 text: Annotated[str, XMLAttribute(sync=SyncType.STRING, get="text", create="text")] 

147 id: Annotated[str | None, XMLAttribute()] = None 

148 

149 @abstractmethod 

150 def insert_self_in_soup(self, soup: NCXSoup): ... 

151 

152 

153class NCXAuthor(NCXDocData): 

154 """Authorship in the NCX file.""" 

155 

156 tag_name: ClassVar[str] = "docAuthor" 

157 

158 @override 

159 def insert_self_in_soup(self, soup: NCXSoup): 

160 previous_tag = soup.find_all(["docAuthor", "docTitle"])[-1] 

161 __ = previous_tag.insert_after(self.tag) 

162 

163 

164class NCXTitle(NCXDocData): 

165 """Title in the NCX file.""" 

166 

167 tag_name: ClassVar[str] = "docTitle" 

168 

169 @override 

170 def insert_self_in_soup(self, soup: NCXSoup): 

171 previous_tag = soup.head 

172 __ = previous_tag.insert_after(self.tag) 

173 

174 

175def create_ncx_text_tag(parent: str, soup: bs4.BeautifulSoup, tag: bs4.Tag) -> bs4.Tag: 

176 new_tag = soup.new_tag("text") 

177 parent_tag = tag.select_one(f"& > {parent}") 

178 

179 if not parent_tag: 

180 parent_tag = soup.new_tag(parent) 

181 info_tag = tag.select_one("& > navInfo") 

182 if parent != "navInfo" and info_tag: 

183 __ = info_tag.insert_after(parent_tag) 

184 else: 

185 __ = tag.insert(0, parent_tag) 

186 

187 __ = parent_tag.insert(0, new_tag) 

188 return new_tag 

189 

190 

191@dataclass(kw_only=True) 

192class NCXHrefElement(HrefElement[NCXSoup], ABC): 

193 """ 

194 An element in the NCX that has text and href, such as navPoint or 

195 pageTarget. 

196 """ 

197 

198 @staticmethod 

199 def create_href_tag(soup: bs4.BeautifulSoup, tag: bs4.Tag) -> bs4.Tag: 

200 new_tag = soup.new_tag("content") 

201 if tag.select_one("& > navLabel"): 

202 __ = tag.insert(1, new_tag) 

203 else: 

204 __ = tag.insert(0, new_tag) 

205 

206 return new_tag 

207 

208 id: Annotated[str, XMLAttribute()] 

209 href: Annotated[str, XMLAttribute("src", get="content", create=create_href_tag)] = ( 

210 "" 

211 ) 

212 text: Annotated[ 

213 str, 

214 XMLAttribute( 

215 sync=SyncType.STRING, 

216 get=lambda tag: tag.select_one("& > navLabel > text"), 

217 create=partial(create_ncx_text_tag, "navLabel"), 

218 ), 

219 ] 

220 

221 

222@dataclass(kw_only=True) 

223class NCXWithInfo: 

224 """Mixin for NCX elements that have info elements.""" 

225 

226 info: Annotated[ 

227 str | None, 

228 XMLAttribute( 

229 sync=SyncType.STRING, 

230 get=lambda tag: tag.select_one("& > navInfo > text"), 

231 create=partial(create_ncx_text_tag, "navInfo"), 

232 ), 

233 ] = None 

234 

235 

236@dataclass(kw_only=True) 

237class NCXNavPoint( 

238 NCXHrefElement, 

239 HrefRecursiveElement["NCXNavPoint", NCXSoup], 

240 NCXWithInfo, 

241): 

242 """A navigation point in NCX table of contents.""" 

243 

244 play_order: Annotated[int | None, XMLAttribute("playOrder")] = None 

245 

246 tag_name: ClassVar[str] = "navPoint" 

247 

248 @override 

249 def insert( # type: ignore[reportIncompatibleMethodOverride] 

250 self, 

251 position: int | None, 

252 text: str, 

253 filename: str | Path, 

254 id: str | None = None, 

255 info: str | None = None, 

256 ) -> "NCXNavPoint": 

257 """Insert a new navPoint in a specific position of the navMap.""" 

258 return super().insert( 

259 position, 

260 text=text, 

261 filename=str(filename), 

262 id=self.get_new_id(str(filename)) if id is None else id, 

263 info=info, 

264 ) 

265 

266 @override 

267 def add( # type: ignore[reportIncompatibleMethodOverride] 

268 self, 

269 text: str, 

270 filename: str | Path, 

271 id: str | None = None, 

272 info: str | None = None, 

273 ) -> "NCXNavPoint": 

274 """Add a new navPoint to the navMap.""" 

275 

276 return super().add( 

277 text=text, 

278 filename=str(filename), 

279 id=self.get_new_id(str(filename)) if id is None else id, 

280 info=info, 

281 ) 

282 

283 @override 

284 def add_after_self( # type: ignore[reportIncompatibleMethodOverride] 

285 self, 

286 text: str, 

287 filename: str | Path, 

288 id: str | None = None, 

289 info: str | None = None, 

290 ) -> "NCXNavPoint": 

291 """Add a new navPoint to the navMap, after this one.""" 

292 

293 return super().add_after_self( 

294 text=text, 

295 filename=str(filename), 

296 id=self.get_new_id(str(filename)) if id is None else id, 

297 info=info, 

298 ) 

299 

300 

301class NumberUpdating(Protocol): 

302 def update_numbers(self) -> None: ... 

303 

304 

305@dataclass(kw_only=True) 

306class NCXNavMap( 

307 HrefRoot[NCXNavPoint, NCXSoup], 

308 XMLElement[NCXSoup], 

309 NCXWithInfo, 

310): 

311 """The navigation map in the NCX file.""" 

312 

313 text: Annotated[ 

314 str | None, 

315 XMLAttribute( 

316 sync=SyncType.STRING, 

317 get=lambda tag: tag.select_one("& > navLabel > text"), 

318 create=partial(create_ncx_text_tag, "navLabel"), 

319 ), 

320 ] = None 

321 parent: NumberUpdating 

322 

323 tag_name: ClassVar[str] = "navMap" 

324 

325 @classmethod 

326 @override 

327 def from_tag( # type: ignore[reportIncompatibleMethodOverride] 

328 cls, 

329 soup: NCXSoup, 

330 tag: bs4.Tag, 

331 own_filename: str | Path, 

332 parent: NumberUpdating, 

333 ) -> Self: 

334 return super().from_tag( 

335 soup, 

336 tag, 

337 own_filename=str(own_filename), 

338 parent=parent, # type: ignore[reportArgumentType] 

339 ) 

340 

341 def insert_self_in_soup(self): 

342 ncx = self.soup.ncx 

343 if not ncx: 

344 raise EPUBError("Invalid NCX file: couldn't find 'ncx' tag") 

345 

346 for tag_name in ["head", "docTitle", "docAuthor"]: 

347 other = ncx.find_all(tag_name)[-1] 

348 if other: 

349 __ = other.insert_after(self.tag) 

350 return 

351 

352 __ = ncx.insert(0, self.tag) 

353 

354 @override 

355 def insert( # type: ignore[reportIncompatibleMethodOverride] 

356 self, 

357 position: int | None, 

358 text: str, 

359 filename: str | Path, 

360 id: str | None = None, 

361 ) -> NCXNavPoint: 

362 """Insert a new navPoint to the navMap at the given position.""" 

363 item = super().insert( 

364 position, 

365 text=text, 

366 filename=str(filename), 

367 id=self.get_new_id(Path(filename).name) if id is None else id, 

368 ) 

369 self.parent.update_numbers() 

370 return item 

371 

372 @override 

373 def add( # type: ignore[reportIncompatibleMethodOverride] 

374 self, 

375 text: str, 

376 filename: str | Path, 

377 id: str | None = None, 

378 ) -> NCXNavPoint: 

379 """Add a new navPoint to the navMap.""" 

380 

381 item = super().add( 

382 text=text, 

383 filename=str(filename), 

384 id=self.get_new_id(Path(filename).name) if id is None else id, 

385 ) 

386 self.parent.update_numbers() 

387 return item 

388 

389 def reset(self, entries: Sequence[TOCEntryData]): 

390 new_tag = self.soup.new_tag(self.tag_name) 

391 __ = self.tag.replace_with(new_tag) 

392 

393 self.tag: bs4.Tag = new_tag 

394 self._items: list[NCXNavPoint] = [] 

395 

396 def add_items(item: NCXNavPoint | NCXNavMap, children: Sequence[TOCEntryData]): 

397 for entry in children: 

398 if not entry.label.strip(): 

399 continue 

400 filename = entry.filename 

401 if entry.id is not None: 

402 filename += f"#{entry.id}" 

403 added_item = item.add(text=entry.label, filename=filename) 

404 add_items(added_item, entry.children) 

405 

406 add_items(self, entries) 

407 self.parent.update_numbers() 

408 

409 

410@dataclass(kw_only=True) 

411class NCXPageTarget(NCXHrefElement): 

412 """A page target in the NCX page list.""" 

413 

414 type: Annotated[Literal["front", "normal", "special"] | None, XMLAttribute()] = None 

415 

416 def __post_init__(self): 

417 if self.type is None: 

418 page_number = parse_int(self.text) 

419 if page_number is not None and page_number > 0: 

420 self.type = "normal" 

421 elif all(char in "ivxlcdm" for char in self.text.lower()): 

422 self.type = "front" 

423 else: 

424 self.type = "special" 

425 

426 super().__post_init__() 

427 

428 tag_name: ClassVar[str] = "pageTarget" 

429 

430 

431@dataclass(kw_only=True) 

432class NCXPageList( 

433 ParentOfHref[NCXPageTarget, NCXSoup], 

434 XMLElement[NCXSoup], 

435 NCXWithInfo, 

436): 

437 own_filename: str 

438 parent: NumberUpdating 

439 

440 tag_name: ClassVar[str] = "pageList" 

441 

442 def insert_self_in_soup(self): 

443 __ = self.soup.navMap.insert_after(self.tag) 

444 

445 @property 

446 def largest_page_number(self) -> int | None: 

447 """The largest page number in the page list.""" 

448 if not self.items: 

449 return None 

450 

451 return max(parse_int(item.text) or 0 for item in self.items) 

452 

453 @classmethod 

454 @override 

455 def from_tag( # type: ignore[reportIncompatibleMethodOverride] 

456 cls, 

457 soup: NCXSoup, 

458 tag: bs4.Tag, 

459 own_filename: str | Path, 

460 parent: NumberUpdating, 

461 ) -> Self: 

462 return super().from_tag( 

463 soup, 

464 tag, 

465 own_filename=str(own_filename), 

466 parent=parent, # type: ignore[reportArgumentType] 

467 ) 

468 

469 def insert( # type: ignore[reportIncompatibleMethodOverride] 

470 self, 

471 position: int, 

472 text: str, 

473 filename: str | Path, 

474 id: str | None = None, 

475 type: Literal["front", "normal", "special"] | None = None, 

476 ) -> NCXPageTarget: 

477 """Add a new pageTarget to the pageList.""" 

478 

479 item = super().insert( 

480 position, 

481 text=text, 

482 filename=str(filename), 

483 id=self.get_new_id(str(filename)) if id is None else id, 

484 ) 

485 self.parent.update_numbers() 

486 return item 

487 

488 def add( # type: ignore[reportIncompatibleMethodOverride] 

489 self, 

490 text: str, 

491 filename: str | Path, 

492 id: str | None = None, 

493 type: Literal["front", "normal", "special"] | None = None, 

494 ) -> NCXPageTarget: 

495 """Add a new pageTarget to the pageList.""" 

496 

497 item = super().add( 

498 text=text, 

499 filename=str(filename), 

500 id=self.get_new_id(str(filename)) if id is None else id, 

501 ) 

502 self.parent.update_numbers() 

503 return item 

504 

505 def reset(self, entries: Sequence[PageBreakData]): 

506 new_tag = self.soup.new_tag(self.tag_name) 

507 __ = self.tag.replace_with(new_tag) 

508 self.tag: bs4.Tag = new_tag 

509 self._items: list[NCXPageTarget] = [] 

510 

511 for index, pagebreak in enumerate( 

512 sorted(entries, key=attrgetter("page")), start=1 

513 ): 

514 __ = self.add_item( 

515 NCXPageTarget( 

516 soup=self.soup, 

517 filename=pagebreak.filename, 

518 own_filename=self.own_filename, 

519 id=f"page-target-{index}", 

520 text=pagebreak.label, 

521 ) 

522 ) 

523 

524 self.parent.update_numbers() 

525 

526 

527class NCXNavList(NCXNavMap): 

528 """A navigation list in the NCX file.""" 

529 

530 tag_name: ClassVar[str] = "navList" 

531 

532 @override 

533 def insert_self_in_soup(self): 

534 ncx = self.soup.ncx 

535 if not ncx: 

536 raise EPUBError("Invalid NCX file: couldn't find 'ncx' tag") 

537 

538 for tag_name in ["navMap", "pageList"]: 

539 other = ncx.find_all(tag_name)[-1] 

540 if other: 

541 __ = other.insert_after(self.tag) 

542 return 

543 

544 __ = ncx.insert(0, self.tag)