Coverage for src/epublib/nav/reset.py: 100%

116 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 15:07 -0300

1from collections.abc import Iterable 

2from typing import cast 

3 

4import bs4 

5 

6from epublib.exceptions import EPUBError 

7from epublib.nav.resource import NavigationDocument 

8from epublib.nav.util import LandmarkEntryData, PageBreakData, TOCEntryData, detect_page 

9from epublib.resources import ContentDocument, Resource, XMLResource 

10from epublib.types import BookProtocol 

11from epublib.util import ( 

12 attr_to_str, 

13 new_id_in_tag, 

14) 

15 

16 

17def get_flat_toc_entries( 

18 resources: Iterable[Resource], 

19 targets_selector: str | None = None, 

20 include_filenames: bool = False, 

21) -> list[TOCEntryData]: 

22 entries: list[TOCEntryData] = [] 

23 

24 for resource in resources: 

25 if targets_selector is None or include_filenames: 

26 label = resource.get_title() 

27 entries.append(TOCEntryData(resource.filename, label=label)) 

28 if targets_selector and isinstance(resource, XMLResource): 

29 soup = cast(bs4.BeautifulSoup, resource.soup) 

30 for index, tag in enumerate(soup.select(targets_selector)): 

31 label = tag.get_text() 

32 identifier = attr_to_str(tag.get("id")) 

33 if not identifier: 

34 base_id = label if label else f"toc-target-{index + 1}" 

35 identifier = tag["id"] = new_id_in_tag(base_id, soup) 

36 entries.append( 

37 TOCEntryData( 

38 resource.filename, 

39 label=label, 

40 id=identifier, 

41 ) 

42 ) 

43 

44 return entries 

45 

46 

47def get_nested_toc_entries( 

48 resources: Iterable[XMLResource], 

49 targets_selector: str, 

50 include_filenames: bool, 

51) -> list[TOCEntryData]: 

52 assert set(map(str.strip, targets_selector.split(","))) <= { 

53 "h1", 

54 "h2", 

55 "h3", 

56 "h4", 

57 "h5", 

58 "h6", 

59 } 

60 headings = { 

61 name: int(name[1]) 

62 for name in sorted(map(str.strip, targets_selector.split(","))) 

63 } 

64 

65 entries: list[TOCEntryData] = [] 

66 

67 for resource in resources: 

68 stack: list[tuple[int, TOCEntryData]] = [] 

69 

70 if include_filenames: 

71 label = resource.get_title() 

72 entries.append(TOCEntryData(resource.filename, label=label)) 

73 

74 for count, tag in enumerate(resource.soup.select(targets_selector), start=1): 

75 level = headings[tag.name] 

76 identifier = attr_to_str(tag.get("id")) 

77 label = tag.get_text() 

78 if not identifier: 

79 base_id = label if label else f"heading-{count}" 

80 identifier = tag["id"] = new_id_in_tag(base_id, resource.soup) 

81 

82 entry = TOCEntryData(filename=resource.filename, label=label, id=identifier) 

83 while stack and stack[-1][0] >= level: 

84 __ = stack.pop() 

85 

86 if stack: 

87 stack[-1][1].children.append(entry) 

88 else: 

89 entries.append(entry) 

90 

91 stack.append((level, entry)) 

92 

93 return entries 

94 

95 

96def reset_toc( 

97 book: BookProtocol, 

98 targets_selector: str | None = "h1, h2, h3, h4, h5, h6", 

99 include_filenames: bool = False, 

100 spine_only: bool = True, 

101 reset_ncx: bool | None = None, 

102 resource_class: type[Resource] = ContentDocument, 

103 title: str | None = None, 

104): 

105 """ 

106 Reset the table of contents in the navigation document by 

107 detecting targets in content documents. May replace any 

108 existing TOC. 

109 """ 

110 

111 if reset_ncx and not book.ncx: 

112 raise EPUBError.missing_ncx(book, "reset_toc") 

113 

114 if spine_only: 

115 resources = (book.resources[item] for item in book.spine.items) 

116 else: 

117 resources = book.resources.filter(resource_class) 

118 

119 if targets_selector and set(map(str.strip, targets_selector.split(","))) <= { 

120 "h1", 

121 "h2", 

122 "h3", 

123 "h4", 

124 "h5", 

125 "h6", 

126 }: 

127 entries = get_nested_toc_entries( 

128 ( 

129 cast(ContentDocument[bs4.BeautifulSoup], res) 

130 for res in resources 

131 if isinstance(res, ContentDocument) 

132 ), 

133 targets_selector, 

134 include_filenames, 

135 ) 

136 else: 

137 entries = get_flat_toc_entries(resources, targets_selector, include_filenames) 

138 

139 if title is None: 

140 try: 

141 title = book.nav.toc.title if book.nav.toc else None 

142 except EPUBError: 

143 pass 

144 

145 book.nav.reset_toc(entries) 

146 

147 if (reset_ncx or reset_ncx is None) and book.ncx: 

148 book.ncx.nav_map.reset(entries) 

149 

150 if title is not None: 

151 book.nav.toc.title = title 

152 

153 

154def reset_page_list( 

155 book: BookProtocol, 

156 id_format: str = "page_{page}", 

157 label_format: str = "{page}", 

158 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]', 

159 reset_ncx: bool | None = None, 

160): 

161 """ 

162 Reset the page list in the navigation document by detecting 

163 pagebreaks in content documents. Will replace any existing page 

164 list. 

165 """ 

166 pagebreaks: list[PageBreakData] = [] 

167 

168 if reset_ncx and not book.ncx: 

169 raise EPUBError.missing_ncx(book, "reset_page_list") 

170 

171 resources = (book.documents[item] for item in book.spine.items) 

172 

173 for resource in resources: 

174 for tag in resource.soup.select(pagebreak_selector): 

175 page = detect_page(tag) 

176 if page is not None: 

177 if not tag.get("id"): 

178 tag["id"] = new_id_in_tag( 

179 id_format.format(page=page), 

180 resource.soup, 

181 ) 

182 

183 pagebreaks.append( 

184 PageBreakData( 

185 filename=f"{resource.filename}#{attr_to_str(tag['id'])}", 

186 page=page, 

187 label=label_format.format(page=page), 

188 ) 

189 ) 

190 

191 book.nav.reset_page_list(pagebreaks) 

192 if book.ncx and (reset_ncx or reset_ncx is None): 

193 book.ncx.reset_page_list(pagebreaks) 

194 

195 

196def create_page_list( 

197 book: BookProtocol, 

198 id_format: str = "page_{page}", 

199 label_format: str = "{page}", 

200 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]', 

201 reset_ncx: bool | None = None, 

202): 

203 """ 

204 Create new page list in the navigation document by detecting 

205 pagebreaks in content documents. Will raise an error if a page 

206 list already exists. 

207 """ 

208 

209 if reset_ncx and not book.ncx: 

210 raise EPUBError.missing_ncx(book, "create_page_list") 

211 

212 if book.nav.page_list is not None: 

213 raise EPUBError( 

214 "Can't create page list as it already exists. " 

215 f"Consider using '{book.__class__.__name__}.reset_page_list'" 

216 ) 

217 

218 return reset_page_list( 

219 book, 

220 id_format, 

221 label_format, 

222 pagebreak_selector, 

223 reset_ncx, 

224 ) 

225 

226 

227def reset_landmarks( 

228 book: BookProtocol, 

229 include_toc: bool = True, 

230 targets_selector: str | None = None, 

231 default_epub_type: str = "chapter", 

232): 

233 """ 

234 Reset the landmarks in the navigation document by detecting 

235 targets in content documents, and optionally including the TOC. 

236 Will replace existing landmarks. 

237 """ 

238 

239 entries: list[LandmarkEntryData] = [] 

240 if include_toc and book.nav and book.nav.toc: 

241 tag = book.nav.toc.tag 

242 if not book.nav.toc.title or not book.nav.toc.title.strip(): 

243 raise EPUBError("Can't include TOC in landmarks as it has no title") 

244 

245 if not tag.get("id"): 

246 tag["id"] = new_id_in_tag("toc", book.nav.soup) 

247 

248 entries.append( 

249 LandmarkEntryData( 

250 f"{book.nav.filename}#{attr_to_str(tag['id'])}", 

251 book.nav.toc.title, 

252 "toc", 

253 ) 

254 ) 

255 

256 if targets_selector: 

257 for resource in book.resources.filter(XMLResource): 

258 if include_toc and isinstance(resource, NavigationDocument): 

259 continue 

260 

261 for index, tag in enumerate(resource.soup.select(targets_selector)): 

262 label = tag.get_text() 

263 

264 if not label.strip(): 

265 continue 

266 

267 identifier = attr_to_str(tag.get("id")) 

268 if not identifier: 

269 base_id = label if label else f"toc-target-{index + 1}" 

270 identifier = tag["id"] = new_id_in_tag(base_id, resource.soup) 

271 

272 entries.append( 

273 LandmarkEntryData( 

274 f"{resource.filename}#{identifier}", 

275 label, 

276 default_epub_type, 

277 ) 

278 ) 

279 book.nav.reset_landmarks(entries) 

280 

281 

282def create_landmarks( 

283 book: BookProtocol, 

284 include_toc: bool = True, 

285 targets_selector: str | None = None, 

286): 

287 """ 

288 Create landmarks in the navigation document by detecting 

289 targets in content documents, and optionally including the TOC. 

290 Will raise error if landmarks already exist. 

291 """ 

292 

293 if book.nav.landmarks is not None: 

294 raise EPUBError( 

295 "Can't create landmarks as it already exists. " 

296 f"Consider using '{book.__class__.__name__}.reset_landmarks'" 

297 ) 

298 

299 return reset_landmarks(book, include_toc, targets_selector)