Coverage for src/epublib/nav/reset.py: 100%
116 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 15:07 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 15:07 -0300
1from collections.abc import Iterable
2from typing import cast
4import bs4
6from epublib.exceptions import EPUBError
7from epublib.nav.resource import NavigationDocument
8from epublib.nav.util import LandmarkEntryData, PageBreakData, TOCEntryData, detect_page
9from epublib.resources import ContentDocument, Resource, XMLResource
10from epublib.types import BookProtocol
11from epublib.util import (
12 attr_to_str,
13 new_id_in_tag,
14)
17def get_flat_toc_entries(
18 resources: Iterable[Resource],
19 targets_selector: str | None = None,
20 include_filenames: bool = False,
21) -> list[TOCEntryData]:
22 entries: list[TOCEntryData] = []
24 for resource in resources:
25 if targets_selector is None or include_filenames:
26 label = resource.get_title()
27 entries.append(TOCEntryData(resource.filename, label=label))
28 if targets_selector and isinstance(resource, XMLResource):
29 soup = cast(bs4.BeautifulSoup, resource.soup)
30 for index, tag in enumerate(soup.select(targets_selector)):
31 label = tag.get_text()
32 identifier = attr_to_str(tag.get("id"))
33 if not identifier:
34 base_id = label if label else f"toc-target-{index + 1}"
35 identifier = tag["id"] = new_id_in_tag(base_id, soup)
36 entries.append(
37 TOCEntryData(
38 resource.filename,
39 label=label,
40 id=identifier,
41 )
42 )
44 return entries
47def get_nested_toc_entries(
48 resources: Iterable[XMLResource],
49 targets_selector: str,
50 include_filenames: bool,
51) -> list[TOCEntryData]:
52 assert set(map(str.strip, targets_selector.split(","))) <= {
53 "h1",
54 "h2",
55 "h3",
56 "h4",
57 "h5",
58 "h6",
59 }
60 headings = {
61 name: int(name[1])
62 for name in sorted(map(str.strip, targets_selector.split(",")))
63 }
65 entries: list[TOCEntryData] = []
67 for resource in resources:
68 stack: list[tuple[int, TOCEntryData]] = []
70 if include_filenames:
71 label = resource.get_title()
72 entries.append(TOCEntryData(resource.filename, label=label))
74 for count, tag in enumerate(resource.soup.select(targets_selector), start=1):
75 level = headings[tag.name]
76 identifier = attr_to_str(tag.get("id"))
77 label = tag.get_text()
78 if not identifier:
79 base_id = label if label else f"heading-{count}"
80 identifier = tag["id"] = new_id_in_tag(base_id, resource.soup)
82 entry = TOCEntryData(filename=resource.filename, label=label, id=identifier)
83 while stack and stack[-1][0] >= level:
84 __ = stack.pop()
86 if stack:
87 stack[-1][1].children.append(entry)
88 else:
89 entries.append(entry)
91 stack.append((level, entry))
93 return entries
96def reset_toc(
97 book: BookProtocol,
98 targets_selector: str | None = "h1, h2, h3, h4, h5, h6",
99 include_filenames: bool = False,
100 spine_only: bool = True,
101 reset_ncx: bool | None = None,
102 resource_class: type[Resource] = ContentDocument,
103 title: str | None = None,
104):
105 """
106 Reset the table of contents in the navigation document by
107 detecting targets in content documents. May replace any
108 existing TOC.
109 """
111 if reset_ncx and not book.ncx:
112 raise EPUBError.missing_ncx(book, "reset_toc")
114 if spine_only:
115 resources = (book.resources[item] for item in book.spine.items)
116 else:
117 resources = book.resources.filter(resource_class)
119 if targets_selector and set(map(str.strip, targets_selector.split(","))) <= {
120 "h1",
121 "h2",
122 "h3",
123 "h4",
124 "h5",
125 "h6",
126 }:
127 entries = get_nested_toc_entries(
128 (
129 cast(ContentDocument[bs4.BeautifulSoup], res)
130 for res in resources
131 if isinstance(res, ContentDocument)
132 ),
133 targets_selector,
134 include_filenames,
135 )
136 else:
137 entries = get_flat_toc_entries(resources, targets_selector, include_filenames)
139 if title is None:
140 try:
141 title = book.nav.toc.title if book.nav.toc else None
142 except EPUBError:
143 pass
145 book.nav.reset_toc(entries)
147 if (reset_ncx or reset_ncx is None) and book.ncx:
148 book.ncx.nav_map.reset(entries)
150 if title is not None:
151 book.nav.toc.title = title
154def reset_page_list(
155 book: BookProtocol,
156 id_format: str = "page_{page}",
157 label_format: str = "{page}",
158 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]',
159 reset_ncx: bool | None = None,
160):
161 """
162 Reset the page list in the navigation document by detecting
163 pagebreaks in content documents. Will replace any existing page
164 list.
165 """
166 pagebreaks: list[PageBreakData] = []
168 if reset_ncx and not book.ncx:
169 raise EPUBError.missing_ncx(book, "reset_page_list")
171 resources = (book.documents[item] for item in book.spine.items)
173 for resource in resources:
174 for tag in resource.soup.select(pagebreak_selector):
175 page = detect_page(tag)
176 if page is not None:
177 if not tag.get("id"):
178 tag["id"] = new_id_in_tag(
179 id_format.format(page=page),
180 resource.soup,
181 )
183 pagebreaks.append(
184 PageBreakData(
185 filename=f"{resource.filename}#{attr_to_str(tag['id'])}",
186 page=page,
187 label=label_format.format(page=page),
188 )
189 )
191 book.nav.reset_page_list(pagebreaks)
192 if book.ncx and (reset_ncx or reset_ncx is None):
193 book.ncx.reset_page_list(pagebreaks)
196def create_page_list(
197 book: BookProtocol,
198 id_format: str = "page_{page}",
199 label_format: str = "{page}",
200 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]',
201 reset_ncx: bool | None = None,
202):
203 """
204 Create new page list in the navigation document by detecting
205 pagebreaks in content documents. Will raise an error if a page
206 list already exists.
207 """
209 if reset_ncx and not book.ncx:
210 raise EPUBError.missing_ncx(book, "create_page_list")
212 if book.nav.page_list is not None:
213 raise EPUBError(
214 "Can't create page list as it already exists. "
215 f"Consider using '{book.__class__.__name__}.reset_page_list'"
216 )
218 return reset_page_list(
219 book,
220 id_format,
221 label_format,
222 pagebreak_selector,
223 reset_ncx,
224 )
227def reset_landmarks(
228 book: BookProtocol,
229 include_toc: bool = True,
230 targets_selector: str | None = None,
231 default_epub_type: str = "chapter",
232):
233 """
234 Reset the landmarks in the navigation document by detecting
235 targets in content documents, and optionally including the TOC.
236 Will replace existing landmarks.
237 """
239 entries: list[LandmarkEntryData] = []
240 if include_toc and book.nav and book.nav.toc:
241 tag = book.nav.toc.tag
242 if not book.nav.toc.title or not book.nav.toc.title.strip():
243 raise EPUBError("Can't include TOC in landmarks as it has no title")
245 if not tag.get("id"):
246 tag["id"] = new_id_in_tag("toc", book.nav.soup)
248 entries.append(
249 LandmarkEntryData(
250 f"{book.nav.filename}#{attr_to_str(tag['id'])}",
251 book.nav.toc.title,
252 "toc",
253 )
254 )
256 if targets_selector:
257 for resource in book.resources.filter(XMLResource):
258 if include_toc and isinstance(resource, NavigationDocument):
259 continue
261 for index, tag in enumerate(resource.soup.select(targets_selector)):
262 label = tag.get_text()
264 if not label.strip():
265 continue
267 identifier = attr_to_str(tag.get("id"))
268 if not identifier:
269 base_id = label if label else f"toc-target-{index + 1}"
270 identifier = tag["id"] = new_id_in_tag(base_id, resource.soup)
272 entries.append(
273 LandmarkEntryData(
274 f"{resource.filename}#{identifier}",
275 label,
276 default_epub_type,
277 )
278 )
279 book.nav.reset_landmarks(entries)
282def create_landmarks(
283 book: BookProtocol,
284 include_toc: bool = True,
285 targets_selector: str | None = None,
286):
287 """
288 Create landmarks in the navigation document by detecting
289 targets in content documents, and optionally including the TOC.
290 Will raise error if landmarks already exist.
291 """
293 if book.nav.landmarks is not None:
294 raise EPUBError(
295 "Can't create landmarks as it already exists. "
296 f"Consider using '{book.__class__.__name__}.reset_landmarks'"
297 )
299 return reset_landmarks(book, include_toc, targets_selector)