Coverage for src/epublib/nav/reset.py: 99%
100 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
1from typing import cast
3import bs4
5from epublib.exceptions import EPUBError
6from epublib.nav.resource import NavigationDocument
7from epublib.nav.util import LandmarkEntryData, PageBreakData, TOCEntryData, detect_page
8from epublib.resources import ContentDocument, Resource, XMLResource
9from epublib.types import BookProtocol
10from epublib.util import (
11 attr_to_str,
12 get_content_document_title,
13 new_id,
14 new_id_in_tag,
15 slugify,
16 tag_ids,
17)
20def reset_toc(
21 book: BookProtocol,
22 targets_selector: str | None = None,
23 include_filenames: bool = False,
24 spine_only: bool = False, # ensures correct ordering
25 reset_ncx: bool | None = None,
26 resource_class: type[Resource] = ContentDocument,
27):
28 """
29 Reset the table of contents in the navigation document by
30 detecting targets in content documents. May replace any
31 existing TOC.
32 """
34 if not book.nav:
35 raise EPUBError("Can't reset TOC in EPUB without navigation document")
37 if reset_ncx and not book.ncx:
38 raise EPUBError.missing_ncx(book, "reset_toc")
40 entries: list[TOCEntryData] = []
41 if spine_only:
42 resources = (book.resources[item] for item in book.spine.items)
43 else:
44 resources = book.resources.filter(resource_class)
46 for resource in resources:
47 if targets_selector is None or include_filenames:
48 if isinstance(resource, XMLResource) and resource is not book.nav:
49 label = get_content_document_title(
50 cast(bs4.BeautifulSoup, resource.soup)
51 )
52 else:
53 label = resource.filename
54 entries.append(TOCEntryData(resource.filename, label=label))
55 if (
56 targets_selector
57 and isinstance(resource, XMLResource)
58 and resource is not book.nav
59 ):
60 soup = cast(bs4.BeautifulSoup, resource.soup)
61 used_ids: set[str] = set()
62 for index, tag in enumerate(soup.select(targets_selector)):
63 label = tag.get_text()
64 identifier = attr_to_str(tag.get("id"))
65 if not identifier:
66 base_id = slugify(label) if label else f"toc-target-{index + 1}"
67 identifier = tag["id"] = new_id(base_id, used_ids, False)
68 used_ids.add(identifier)
69 entries.append(
70 TOCEntryData(
71 resource.filename,
72 label=label,
73 id=identifier,
74 )
75 )
77 book.nav.reset_toc(entries)
78 if book.ncx and (reset_ncx or reset_ncx is None):
79 book.ncx.reset_nav_map(entries)
82def create_toc(
83 book: BookProtocol,
84 targets_selector: str | None = None,
85 include_filenames: bool = False,
86 spine_only: bool = False, # ensures correct ordering
87 reset_ncx: bool | None = None,
88 resource_class: type[Resource] = ContentDocument,
89):
90 """
91 Create o new table of contents in the navigation document by
92 detecting targets in content documents. Will raise an error if
93 a TOC already exists.
94 """
96 if not book.nav:
97 raise EPUBError("Can't create TOC in EPUB without navigation document")
99 if book.nav.toc is not None:
100 raise EPUBError(
101 "Can't create TOC as it already exists. "
102 f"Consider using '{book.__class__.__name__}.reset_toc'"
103 )
105 if reset_ncx and not book.ncx:
106 raise EPUBError.missing_ncx(book, "create_toc")
108 reset_toc(
109 book,
110 targets_selector,
111 include_filenames,
112 spine_only,
113 reset_ncx,
114 resource_class,
115 )
118def reset_page_list(
119 book: BookProtocol,
120 id_format: str = "page_{page}",
121 label_format: str = "{page}",
122 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]',
123 reset_ncx: bool | None = None,
124):
125 """
126 Reset the page list in the navigation document by detecting
127 pagebreaks in content documents. Will replace any existing page
128 list.
129 """
130 pagebreaks: list[PageBreakData] = []
132 if reset_ncx and not book.ncx:
133 raise EPUBError.missing_ncx(book, "create_toc")
135 if not book.nav:
136 raise EPUBError("Can't reset page list in EPUB without navigation document")
138 for resource in book.resources.filter(ContentDocument):
139 used_ids = tag_ids(resource.soup)
140 for tag in resource.soup.select(pagebreak_selector):
141 page = detect_page(tag)
142 if page is not None:
143 if not tag.get("id"):
144 tag["id"] = new_id(id_format.format(page=page), used_ids, False)
145 used_ids.add(attr_to_str(tag["id"]))
146 pagebreaks.append(
147 PageBreakData(
148 id=attr_to_str(tag["id"]),
149 filename=resource.filename,
150 page=page,
151 label=label_format.format(page=page),
152 )
153 )
155 book.nav.reset_page_list(pagebreaks)
156 if book.ncx and (reset_ncx or reset_ncx is None):
157 book.ncx.reset_page_list(pagebreaks)
160def create_page_list(
161 book: BookProtocol,
162 id_format: str = "page_{page}",
163 label_format: str = "{page}",
164 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]',
165 reset_ncx: bool | None = None,
166):
167 """
168 Create new page list in the navigation document by detecting
169 pagebreaks in content documents. Will raise an error if a page
170 list already exists.
171 """
173 if reset_ncx and not book.ncx:
174 raise EPUBError.missing_ncx(book, "create_page_list")
176 if not book.nav:
177 raise EPUBError("Can't create page list in EPUB without navigation document")
179 if book.nav.page_list is not None:
180 raise EPUBError(
181 "Can't create page list as it already exists. "
182 f"Consider using '{book.__class__.__name__}.reset_page_list'"
183 )
185 return reset_page_list(
186 book,
187 id_format,
188 label_format,
189 pagebreak_selector,
190 reset_ncx,
191 )
194def reset_landmarks(
195 book: BookProtocol,
196 include_toc: bool = True,
197 targets_selector: str | None = None,
198):
199 """
200 Reset the landmarks in the navigation document by detecting
201 targets in content documents, and optionally including the TOC.
202 Will replace existing landmarks.
203 """
205 if not book.nav:
206 raise EPUBError("Can't reset landmarks in EPUB without navigation document")
208 entries: list[LandmarkEntryData] = []
209 if include_toc and book.nav and book.nav.toc:
210 tag = book.nav.toc.tag
211 if not tag.get("id"):
212 tag["id"] = new_id_in_tag("toc", book.nav.soup)
214 entries.append(
215 LandmarkEntryData(
216 book.nav.filename,
217 book.nav.toc.text,
218 attr_to_str(tag["id"]),
219 epub_type="toc",
220 )
221 )
222 if targets_selector:
223 for resource in book.resources.filter(XMLResource):
224 if include_toc and isinstance(resource, NavigationDocument):
225 continue
227 used_ids = tag_ids(resource.soup)
228 for index, tag in enumerate(resource.soup.select(targets_selector)):
229 label = tag.get_text()
230 identifier = attr_to_str(tag.get("id"))
231 if not identifier:
232 base_id = slugify(label) if label else f"toc-target-{index + 1}"
233 identifier = tag["id"] = new_id(base_id, used_ids, False)
234 used_ids.add(identifier)
235 entries.append(
236 LandmarkEntryData(
237 resource.filename,
238 label=label,
239 id=identifier,
240 )
241 )
242 book.nav.reset_landmarks(entries)
245def create_landmarks(
246 book: BookProtocol,
247 include_toc: bool = True,
248 targets_selector: str | None = None,
249):
250 """
251 Create landmarks in the navigation document by detecting
252 targets in content documents, and optionally including the TOC.
253 Will raise error if landmarks already exist.
254 """
256 if not book.nav:
257 raise EPUBError("Can't create landmarks in EPUB without navigation document")
259 if book.nav.landmarks is not None:
260 raise EPUBError(
261 "Can't create landmarks as it already exists. "
262 f"Consider using '{book.__class__.__name__}.reset_landmarks'"
263 )
265 return reset_landmarks(book, include_toc, targets_selector)