Coverage for src/epublib/ncx/__init__.py: 97%
241 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-06 17:21 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-06 17:21 -0300
1from abc import ABC, abstractmethod
2from collections.abc import Sequence
3from dataclasses import dataclass
4from functools import partial
5from operator import attrgetter
6from pathlib import Path
7from typing import Annotated, ClassVar, Literal, Protocol, Self, override
9import bs4
11from epublib.exceptions import EPUBError
12from epublib.nav.util import PageBreakData, TOCEntryData
13from epublib.soup import NCXSoup
14from epublib.util import parse_int
15from epublib.xml_element import (
16 HrefElement,
17 HrefRecursiveElement,
18 HrefRoot,
19 ParentOfHref,
20 SyncType,
21 XMLAttribute,
22 XMLElement,
23 XMLParent,
24)
27@dataclass(kw_only=True)
28class NCXMeta(XMLElement[NCXSoup]):
29 """A metadata item in the NCX head section."""
31 name: Annotated[str, XMLAttribute()]
32 content: Annotated[str, XMLAttribute()]
34 tag_name: ClassVar[str] = "meta"
36 @property
37 def pk(self) -> str:
38 return self.name
41class NCXHead(XMLParent[NCXMeta, NCXSoup]):
42 """The head section of the NCX file."""
44 def __post_init__(self) -> None:
45 if not self.tag.name == "head":
46 raise EPUBError("NCXHead tag must be a <head> element")
48 super().__post_init__()
50 @override
51 def add(self, name: str, content: str) -> NCXMeta: # type: ignore[reportIncompatibleMethodOverride]
52 """Add a new meta item to the head section."""
54 return super().add(name=name, content=content)
56 @property
57 def uid(self) -> str:
58 """The unique identifier of the publication."""
59 try:
60 meta = self["dtb:uid"]
61 except KeyError as error:
62 raise EPUBError("Expected 'dtb:uid' in NCX head") from error
63 return meta.content
65 @uid.setter
66 def uid(self, value: str) -> None:
67 meta = self.get("dtb:uid")
68 if meta:
69 meta.content = value
70 else:
71 __ = self.add(name="dtb:uid", content=value)
73 @property
74 def depth(self) -> int:
75 """The depth of the navigation map strucutre."""
76 try:
77 meta = self["dtb:depth"]
78 except KeyError as error:
79 raise EPUBError("Expected 'dtb:depth' in NCX head") from error
80 return int(meta.content)
82 @depth.setter
83 def depth(self, value: int) -> None:
84 meta = self.get("dtb:depth")
85 if meta:
86 meta.content = str(value)
87 else:
88 __ = self.add(name="dtb:depth", content=str(value))
90 @property
91 def total_page_count(self) -> int | None:
92 """
93 Total page count of the publication. If there are no navigable
94 pages (represented as 0), return None.
95 """
97 try:
98 meta = self["dtb:totalPageCount"]
99 except KeyError as error:
100 raise EPUBError("Expected 'dtb:totalPageCount' in NCX head") from error
101 int_val = int(meta.content)
103 return None if int_val == 0 else int_val
105 @total_page_count.setter
106 def total_page_count(self, value: int | None) -> None:
107 meta = self.get("dtb:totalPageCount")
109 str_value = "0" if value is None else str(value)
111 if meta:
112 meta.content = str_value
113 else:
114 __ = self.add(name="dtb:totalPageCount", content=str_value)
116 @property
117 def max_page_number(self) -> int | None:
118 """
119 Largest value attribute on page targets in the page list. If
120 there are no navigable pages (represented as 0), return None.
121 """
123 meta = self["dtb:maxPageNumber"]
124 int_val = int(meta.content)
126 return None if int_val == 0 else int_val
128 @max_page_number.setter
129 def max_page_number(self, value: int | None) -> None:
130 meta = self.get("dtb:maxPageNumber")
132 str_value = "0" if value is None else str(value)
134 if meta:
135 meta.content = str_value
136 else:
137 __ = self.add(name="dtb:maxPageNumber", content=str_value)
140@dataclass(kw_only=True)
141class NCXDocData(XMLElement[NCXSoup], ABC):
142 """
143 Abstract base class for NCX docTitle or docAuthor elements.
144 """
146 text: Annotated[str, XMLAttribute(sync=SyncType.STRING, get="text", create="text")]
147 id: Annotated[str | None, XMLAttribute()] = None
149 @abstractmethod
150 def insert_self_in_soup(self, soup: NCXSoup): ...
153class NCXAuthor(NCXDocData):
154 """Authorship in the NCX file."""
156 tag_name: ClassVar[str] = "docAuthor"
158 @override
159 def insert_self_in_soup(self, soup: NCXSoup):
160 previous_tag = soup.find_all(["docAuthor", "docTitle"])[-1]
161 __ = previous_tag.insert_after(self.tag)
164class NCXTitle(NCXDocData):
165 """Title in the NCX file."""
167 tag_name: ClassVar[str] = "docTitle"
169 @override
170 def insert_self_in_soup(self, soup: NCXSoup):
171 previous_tag = soup.head
172 __ = previous_tag.insert_after(self.tag)
175def create_ncx_text_tag(parent: str, soup: bs4.BeautifulSoup, tag: bs4.Tag) -> bs4.Tag:
176 new_tag = soup.new_tag("text")
177 parent_tag = tag.select_one(f"& > {parent}")
179 if not parent_tag:
180 parent_tag = soup.new_tag(parent)
181 info_tag = tag.select_one("& > navInfo")
182 if parent != "navInfo" and info_tag:
183 __ = info_tag.insert_after(parent_tag)
184 else:
185 __ = tag.insert(0, parent_tag)
187 __ = parent_tag.insert(0, new_tag)
188 return new_tag
191@dataclass(kw_only=True)
192class NCXHrefElement(HrefElement[NCXSoup], ABC):
193 """
194 An element in the NCX that has text and href, such as navPoint or
195 pageTarget.
196 """
198 @staticmethod
199 def create_href_tag(soup: bs4.BeautifulSoup, tag: bs4.Tag) -> bs4.Tag:
200 new_tag = soup.new_tag("content")
201 if tag.select_one("& > navLabel"):
202 __ = tag.insert(1, new_tag)
203 else:
204 __ = tag.insert(0, new_tag)
206 return new_tag
208 id: Annotated[str, XMLAttribute()]
209 href: Annotated[str, XMLAttribute("src", get="content", create=create_href_tag)] = (
210 ""
211 )
212 text: Annotated[
213 str,
214 XMLAttribute(
215 sync=SyncType.STRING,
216 get=lambda tag: tag.select_one("& > navLabel > text"),
217 create=partial(create_ncx_text_tag, "navLabel"),
218 ),
219 ]
222@dataclass(kw_only=True)
223class NCXWithInfo:
224 """Mixin for NCX elements that have info elements."""
226 info: Annotated[
227 str | None,
228 XMLAttribute(
229 sync=SyncType.STRING,
230 get=lambda tag: tag.select_one("& > navInfo > text"),
231 create=partial(create_ncx_text_tag, "navInfo"),
232 ),
233 ] = None
236@dataclass(kw_only=True)
237class NCXNavPoint(
238 NCXHrefElement,
239 HrefRecursiveElement["NCXNavPoint", NCXSoup],
240 NCXWithInfo,
241):
242 """A navigation point in NCX table of contents."""
244 play_order: Annotated[int | None, XMLAttribute("playOrder")] = None
246 tag_name: ClassVar[str] = "navPoint"
248 @override
249 def insert( # type: ignore[reportIncompatibleMethodOverride]
250 self,
251 position: int | None,
252 text: str,
253 filename: str | Path,
254 id: str | None = None,
255 info: str | None = None,
256 ) -> "NCXNavPoint":
257 """Insert a new navPoint in a specific position of the navMap."""
258 return super().insert(
259 position,
260 text=text,
261 filename=str(filename),
262 id=self.get_new_id(str(filename)) if id is None else id,
263 info=info,
264 )
266 @override
267 def add( # type: ignore[reportIncompatibleMethodOverride]
268 self,
269 text: str,
270 filename: str | Path,
271 id: str | None = None,
272 info: str | None = None,
273 ) -> "NCXNavPoint":
274 """Add a new navPoint to the navMap."""
276 return super().add(
277 text=text,
278 filename=str(filename),
279 id=self.get_new_id(str(filename)) if id is None else id,
280 info=info,
281 )
283 @override
284 def add_after_self( # type: ignore[reportIncompatibleMethodOverride]
285 self,
286 text: str,
287 filename: str | Path,
288 id: str | None = None,
289 info: str | None = None,
290 ) -> "NCXNavPoint":
291 """Add a new navPoint to the navMap, after this one."""
293 return super().add_after_self(
294 text=text,
295 filename=str(filename),
296 id=self.get_new_id(str(filename)) if id is None else id,
297 info=info,
298 )
301class NumberUpdating(Protocol):
302 def update_numbers(self) -> None: ...
305@dataclass(kw_only=True)
306class NCXNavMap(
307 HrefRoot[NCXNavPoint, NCXSoup],
308 XMLElement[NCXSoup],
309 NCXWithInfo,
310):
311 """The navigation map in the NCX file."""
313 text: Annotated[
314 str | None,
315 XMLAttribute(
316 sync=SyncType.STRING,
317 get=lambda tag: tag.select_one("& > navLabel > text"),
318 create=partial(create_ncx_text_tag, "navLabel"),
319 ),
320 ] = None
321 parent: NumberUpdating
323 tag_name: ClassVar[str] = "navMap"
325 @classmethod
326 @override
327 def from_tag( # type: ignore[reportIncompatibleMethodOverride]
328 cls,
329 soup: NCXSoup,
330 tag: bs4.Tag,
331 own_filename: str | Path,
332 parent: NumberUpdating,
333 ) -> Self:
334 return super().from_tag(
335 soup,
336 tag,
337 own_filename=str(own_filename),
338 parent=parent, # type: ignore[reportArgumentType]
339 )
341 def insert_self_in_soup(self):
342 ncx = self.soup.ncx
343 if not ncx:
344 raise EPUBError("Invalid NCX file: couldn't find 'ncx' tag")
346 for tag_name in ["head", "docTitle", "docAuthor"]:
347 other = ncx.find_all(tag_name)[-1]
348 if other:
349 __ = other.insert_after(self.tag)
350 return
352 __ = ncx.insert(0, self.tag)
354 @override
355 def insert( # type: ignore[reportIncompatibleMethodOverride]
356 self,
357 position: int | None,
358 text: str,
359 filename: str | Path,
360 id: str | None = None,
361 ) -> NCXNavPoint:
362 """Insert a new navPoint to the navMap at the given position."""
363 item = super().insert(
364 position,
365 text=text,
366 filename=str(filename),
367 id=self.get_new_id(Path(filename).name) if id is None else id,
368 )
369 self.parent.update_numbers()
370 return item
372 @override
373 def add( # type: ignore[reportIncompatibleMethodOverride]
374 self,
375 text: str,
376 filename: str | Path,
377 id: str | None = None,
378 ) -> NCXNavPoint:
379 """Add a new navPoint to the navMap."""
381 item = super().add(
382 text=text,
383 filename=str(filename),
384 id=self.get_new_id(Path(filename).name) if id is None else id,
385 )
386 self.parent.update_numbers()
387 return item
389 def reset(self, entries: Sequence[TOCEntryData]):
390 new_tag = self.soup.new_tag(self.tag_name)
391 __ = self.tag.replace_with(new_tag)
393 self.tag: bs4.Tag = new_tag
394 self._items: list[NCXNavPoint] = []
396 def add_items(item: NCXNavPoint | NCXNavMap, children: Sequence[TOCEntryData]):
397 for entry in children:
398 if not entry.label.strip():
399 continue
400 filename = entry.filename
401 if entry.id is not None:
402 filename += f"#{entry.id}"
403 added_item = item.add(text=entry.label, filename=filename)
404 add_items(added_item, entry.children)
406 add_items(self, entries)
407 self.parent.update_numbers()
410@dataclass(kw_only=True)
411class NCXPageTarget(NCXHrefElement):
412 """A page target in the NCX page list."""
414 type: Annotated[Literal["front", "normal", "special"] | None, XMLAttribute()] = None
416 def __post_init__(self):
417 if self.type is None:
418 page_number = parse_int(self.text)
419 if page_number is not None and page_number > 0:
420 self.type = "normal"
421 elif all(char in "ivxlcdm" for char in self.text.lower()):
422 self.type = "front"
423 else:
424 self.type = "special"
426 super().__post_init__()
428 tag_name: ClassVar[str] = "pageTarget"
431@dataclass(kw_only=True)
432class NCXPageList(
433 ParentOfHref[NCXPageTarget, NCXSoup],
434 XMLElement[NCXSoup],
435 NCXWithInfo,
436):
437 own_filename: str
438 parent: NumberUpdating
440 tag_name: ClassVar[str] = "pageList"
442 def insert_self_in_soup(self):
443 __ = self.soup.navMap.insert_after(self.tag)
445 @property
446 def largest_page_number(self) -> int | None:
447 """The largest page number in the page list."""
448 if not self.items:
449 return None
451 return max(parse_int(item.text) or 0 for item in self.items)
453 @classmethod
454 @override
455 def from_tag( # type: ignore[reportIncompatibleMethodOverride]
456 cls,
457 soup: NCXSoup,
458 tag: bs4.Tag,
459 own_filename: str | Path,
460 parent: NumberUpdating,
461 ) -> Self:
462 return super().from_tag(
463 soup,
464 tag,
465 own_filename=str(own_filename),
466 parent=parent, # type: ignore[reportArgumentType]
467 )
469 def insert( # type: ignore[reportIncompatibleMethodOverride]
470 self,
471 position: int,
472 text: str,
473 filename: str | Path,
474 id: str | None = None,
475 type: Literal["front", "normal", "special"] | None = None,
476 ) -> NCXPageTarget:
477 """Add a new pageTarget to the pageList."""
479 item = super().insert(
480 position,
481 text=text,
482 filename=str(filename),
483 id=self.get_new_id(str(filename)) if id is None else id,
484 )
485 self.parent.update_numbers()
486 return item
488 def add( # type: ignore[reportIncompatibleMethodOverride]
489 self,
490 text: str,
491 filename: str | Path,
492 id: str | None = None,
493 type: Literal["front", "normal", "special"] | None = None,
494 ) -> NCXPageTarget:
495 """Add a new pageTarget to the pageList."""
497 item = super().add(
498 text=text,
499 filename=str(filename),
500 id=self.get_new_id(str(filename)) if id is None else id,
501 )
502 self.parent.update_numbers()
503 return item
505 def reset(self, entries: Sequence[PageBreakData]):
506 new_tag = self.soup.new_tag(self.tag_name)
507 __ = self.tag.replace_with(new_tag)
508 self.tag: bs4.Tag = new_tag
509 self._items: list[NCXPageTarget] = []
511 for index, pagebreak in enumerate(
512 sorted(entries, key=attrgetter("page")), start=1
513 ):
514 __ = self.add_item(
515 NCXPageTarget(
516 soup=self.soup,
517 filename=pagebreak.filename,
518 own_filename=self.own_filename,
519 id=f"page-target-{index}",
520 text=pagebreak.label,
521 )
522 )
524 self.parent.update_numbers()
527class NCXNavList(NCXNavMap):
528 """A navigation list in the NCX file."""
530 tag_name: ClassVar[str] = "navList"
532 @override
533 def insert_self_in_soup(self):
534 ncx = self.soup.ncx
535 if not ncx:
536 raise EPUBError("Invalid NCX file: couldn't find 'ncx' tag")
538 for tag_name in ["navMap", "pageList"]:
539 other = ncx.find_all(tag_name)[-1]
540 if other:
541 __ = other.insert_after(self.tag)
542 return
544 __ = ncx.insert(0, self.tag)