Coverage for src/epublib/nav/reset.py: 99%

100 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1from typing import cast 

2 

3import bs4 

4 

5from epublib.exceptions import EPUBError 

6from epublib.nav.resource import NavigationDocument 

7from epublib.nav.util import LandmarkEntryData, PageBreakData, TOCEntryData, detect_page 

8from epublib.resources import ContentDocument, Resource, XMLResource 

9from epublib.types import BookProtocol 

10from epublib.util import ( 

11 attr_to_str, 

12 get_content_document_title, 

13 new_id, 

14 new_id_in_tag, 

15 slugify, 

16 tag_ids, 

17) 

18 

19 

20def reset_toc( 

21 book: BookProtocol, 

22 targets_selector: str | None = None, 

23 include_filenames: bool = False, 

24 spine_only: bool = False, # ensures correct ordering 

25 reset_ncx: bool | None = None, 

26 resource_class: type[Resource] = ContentDocument, 

27): 

28 """ 

29 Reset the table of contents in the navigation document by 

30 detecting targets in content documents. May replace any 

31 existing TOC. 

32 """ 

33 

34 if not book.nav: 

35 raise EPUBError("Can't reset TOC in EPUB without navigation document") 

36 

37 if reset_ncx and not book.ncx: 

38 raise EPUBError.missing_ncx(book, "reset_toc") 

39 

40 entries: list[TOCEntryData] = [] 

41 if spine_only: 

42 resources = (book.resources[item] for item in book.spine.items) 

43 else: 

44 resources = book.resources.filter(resource_class) 

45 

46 for resource in resources: 

47 if targets_selector is None or include_filenames: 

48 if isinstance(resource, XMLResource) and resource is not book.nav: 

49 label = get_content_document_title( 

50 cast(bs4.BeautifulSoup, resource.soup) 

51 ) 

52 else: 

53 label = resource.filename 

54 entries.append(TOCEntryData(resource.filename, label=label)) 

55 if ( 

56 targets_selector 

57 and isinstance(resource, XMLResource) 

58 and resource is not book.nav 

59 ): 

60 soup = cast(bs4.BeautifulSoup, resource.soup) 

61 used_ids: set[str] = set() 

62 for index, tag in enumerate(soup.select(targets_selector)): 

63 label = tag.get_text() 

64 identifier = attr_to_str(tag.get("id")) 

65 if not identifier: 

66 base_id = slugify(label) if label else f"toc-target-{index + 1}" 

67 identifier = tag["id"] = new_id(base_id, used_ids, False) 

68 used_ids.add(identifier) 

69 entries.append( 

70 TOCEntryData( 

71 resource.filename, 

72 label=label, 

73 id=identifier, 

74 ) 

75 ) 

76 

77 book.nav.reset_toc(entries) 

78 if book.ncx and (reset_ncx or reset_ncx is None): 

79 book.ncx.reset_nav_map(entries) 

80 

81 

82def create_toc( 

83 book: BookProtocol, 

84 targets_selector: str | None = None, 

85 include_filenames: bool = False, 

86 spine_only: bool = False, # ensures correct ordering 

87 reset_ncx: bool | None = None, 

88 resource_class: type[Resource] = ContentDocument, 

89): 

90 """ 

91 Create o new table of contents in the navigation document by 

92 detecting targets in content documents. Will raise an error if 

93 a TOC already exists. 

94 """ 

95 

96 if not book.nav: 

97 raise EPUBError("Can't create TOC in EPUB without navigation document") 

98 

99 if book.nav.toc is not None: 

100 raise EPUBError( 

101 "Can't create TOC as it already exists. " 

102 f"Consider using '{book.__class__.__name__}.reset_toc'" 

103 ) 

104 

105 if reset_ncx and not book.ncx: 

106 raise EPUBError.missing_ncx(book, "create_toc") 

107 

108 reset_toc( 

109 book, 

110 targets_selector, 

111 include_filenames, 

112 spine_only, 

113 reset_ncx, 

114 resource_class, 

115 ) 

116 

117 

118def reset_page_list( 

119 book: BookProtocol, 

120 id_format: str = "page_{page}", 

121 label_format: str = "{page}", 

122 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]', 

123 reset_ncx: bool | None = None, 

124): 

125 """ 

126 Reset the page list in the navigation document by detecting 

127 pagebreaks in content documents. Will replace any existing page 

128 list. 

129 """ 

130 pagebreaks: list[PageBreakData] = [] 

131 

132 if reset_ncx and not book.ncx: 

133 raise EPUBError.missing_ncx(book, "create_toc") 

134 

135 if not book.nav: 

136 raise EPUBError("Can't reset page list in EPUB without navigation document") 

137 

138 for resource in book.resources.filter(ContentDocument): 

139 used_ids = tag_ids(resource.soup) 

140 for tag in resource.soup.select(pagebreak_selector): 

141 page = detect_page(tag) 

142 if page is not None: 

143 if not tag.get("id"): 

144 tag["id"] = new_id(id_format.format(page=page), used_ids, False) 

145 used_ids.add(attr_to_str(tag["id"])) 

146 pagebreaks.append( 

147 PageBreakData( 

148 id=attr_to_str(tag["id"]), 

149 filename=resource.filename, 

150 page=page, 

151 label=label_format.format(page=page), 

152 ) 

153 ) 

154 

155 book.nav.reset_page_list(pagebreaks) 

156 if book.ncx and (reset_ncx or reset_ncx is None): 

157 book.ncx.reset_page_list(pagebreaks) 

158 

159 

160def create_page_list( 

161 book: BookProtocol, 

162 id_format: str = "page_{page}", 

163 label_format: str = "{page}", 

164 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]', 

165 reset_ncx: bool | None = None, 

166): 

167 """ 

168 Create new page list in the navigation document by detecting 

169 pagebreaks in content documents. Will raise an error if a page 

170 list already exists. 

171 """ 

172 

173 if reset_ncx and not book.ncx: 

174 raise EPUBError.missing_ncx(book, "create_page_list") 

175 

176 if not book.nav: 

177 raise EPUBError("Can't create page list in EPUB without navigation document") 

178 

179 if book.nav.page_list is not None: 

180 raise EPUBError( 

181 "Can't create page list as it already exists. " 

182 f"Consider using '{book.__class__.__name__}.reset_page_list'" 

183 ) 

184 

185 return reset_page_list( 

186 book, 

187 id_format, 

188 label_format, 

189 pagebreak_selector, 

190 reset_ncx, 

191 ) 

192 

193 

194def reset_landmarks( 

195 book: BookProtocol, 

196 include_toc: bool = True, 

197 targets_selector: str | None = None, 

198): 

199 """ 

200 Reset the landmarks in the navigation document by detecting 

201 targets in content documents, and optionally including the TOC. 

202 Will replace existing landmarks. 

203 """ 

204 

205 if not book.nav: 

206 raise EPUBError("Can't reset landmarks in EPUB without navigation document") 

207 

208 entries: list[LandmarkEntryData] = [] 

209 if include_toc and book.nav and book.nav.toc: 

210 tag = book.nav.toc.tag 

211 if not tag.get("id"): 

212 tag["id"] = new_id_in_tag("toc", book.nav.soup) 

213 

214 entries.append( 

215 LandmarkEntryData( 

216 book.nav.filename, 

217 book.nav.toc.text, 

218 attr_to_str(tag["id"]), 

219 epub_type="toc", 

220 ) 

221 ) 

222 if targets_selector: 

223 for resource in book.resources.filter(XMLResource): 

224 if include_toc and isinstance(resource, NavigationDocument): 

225 continue 

226 

227 used_ids = tag_ids(resource.soup) 

228 for index, tag in enumerate(resource.soup.select(targets_selector)): 

229 label = tag.get_text() 

230 identifier = attr_to_str(tag.get("id")) 

231 if not identifier: 

232 base_id = slugify(label) if label else f"toc-target-{index + 1}" 

233 identifier = tag["id"] = new_id(base_id, used_ids, False) 

234 used_ids.add(identifier) 

235 entries.append( 

236 LandmarkEntryData( 

237 resource.filename, 

238 label=label, 

239 id=identifier, 

240 ) 

241 ) 

242 book.nav.reset_landmarks(entries) 

243 

244 

245def create_landmarks( 

246 book: BookProtocol, 

247 include_toc: bool = True, 

248 targets_selector: str | None = None, 

249): 

250 """ 

251 Create landmarks in the navigation document by detecting 

252 targets in content documents, and optionally including the TOC. 

253 Will raise error if landmarks already exist. 

254 """ 

255 

256 if not book.nav: 

257 raise EPUBError("Can't create landmarks in EPUB without navigation document") 

258 

259 if book.nav.landmarks is not None: 

260 raise EPUBError( 

261 "Can't create landmarks as it already exists. " 

262 f"Consider using '{book.__class__.__name__}.reset_landmarks'" 

263 ) 

264 

265 return reset_landmarks(book, include_toc, targets_selector)