Coverage for src/epublib/ncx/resource.py: 100%

171 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 14:58 -0300

1from collections.abc import Iterable, Sequence 

2from pathlib import Path 

3from typing import IO, override 

4from zipfile import ZipInfo 

5 

6from epublib.exceptions import EPUBError 

7from epublib.media_type import MediaType 

8from epublib.nav import NavItem, NavRoot 

9from epublib.nav.resource import NavigationDocument 

10from epublib.nav.util import PageBreakData, TOCEntryData 

11from epublib.ncx import ( 

12 NCXAuthor, 

13 NCXHead, 

14 NCXNavList, 

15 NCXNavMap, 

16 NCXNavPoint, 

17 NCXPageList, 

18 NCXTitle, 

19) 

20from epublib.package.metadata import BookMetadata 

21from epublib.resources import PublicationResource, XMLResource 

22from epublib.soup import NCXSoup 

23 

24 

25class NCXFile( # type: ignore[reportUnsafeMultipleInheritance] 

26 PublicationResource, 

27 XMLResource[NCXSoup], 

28): 

29 """ 

30 The NCX document of the EPUB file, sometimes known as the 'toc.ncx' file. 

31 This is used in EPUB2 files for navigation, and was largely superseded by 

32 the package document in EPUB3. Support for it in EPUB3 is optional. 

33 """ 

34 

35 soup_class: type[NCXSoup] = NCXSoup 

36 

37 def __init__( 

38 self, 

39 file: IO[bytes] | bytes, 

40 info: ZipInfo | str | Path, 

41 media_type: MediaType | str = MediaType.NCX, 

42 ) -> None: 

43 super().__init__(file, info, media_type) 

44 self._head: NCXHead | None = None 

45 self._title: NCXTitle | None = None 

46 self._authors: list[NCXAuthor] | None = None 

47 self._nav_map: NCXNavMap | None = None 

48 self._page_list: NCXPageList | None = None 

49 self._nav_lists: Sequence[NCXNavList] | None = None 

50 

51 @property 

52 def head(self) -> NCXHead: 

53 if self._head is None: 

54 self._head = NCXHead(self.soup, self.soup.head) 

55 return self._head 

56 

57 @property 

58 def title(self) -> NCXTitle: 

59 if self._title is None: 

60 self._title = NCXTitle.from_tag(self.soup, self.soup.docTitle) 

61 return self._title 

62 

63 @property 

64 def authors(self) -> Sequence[NCXAuthor]: 

65 if self._authors is None: 

66 self._authors = list( 

67 NCXAuthor.from_tag(self.soup, tag) 

68 for tag in self.soup.find_all("docAuthor") 

69 ) 

70 

71 return tuple(self._authors) 

72 

73 @property 

74 def nav_map(self) -> NCXNavMap: 

75 if self._nav_map is None: 

76 tag = self.soup.select_one("navMap") 

77 if tag: 

78 self._nav_map = NCXNavMap.from_tag( 

79 soup=self.soup, 

80 tag=tag, 

81 own_filename=self.filename, 

82 parent=self, 

83 ) 

84 else: 

85 self._nav_map = NCXNavMap( 

86 soup=self.soup, 

87 own_filename=self.filename, 

88 parent=self, 

89 ) 

90 self._nav_map.insert_self_in_soup() 

91 return self._nav_map 

92 

93 @property 

94 def page_list(self) -> NCXPageList | None: 

95 if self._page_list is None: 

96 tag = self.soup.select_one("pageList") 

97 if tag: 

98 self._page_list = NCXPageList.from_tag( 

99 soup=self.soup, 

100 tag=tag, 

101 own_filename=self.filename, 

102 parent=self, 

103 ) 

104 return self._page_list 

105 

106 @property 

107 def nav_lists(self) -> Sequence[NCXNavList]: 

108 if self._nav_lists is None: 

109 self._nav_lists = tuple( 

110 NCXNavList.from_tag( 

111 soup=self.soup, 

112 tag=tag, 

113 own_filename=self.filename, 

114 parent=self, 

115 ) 

116 for tag in self.soup.find_all("navList") 

117 ) 

118 

119 return self._nav_lists 

120 

121 def remove(self, filename: str): 

122 # Todo: remove references to images and audio as well 

123 self.nav_map.remove_nodes(filename) 

124 if self.page_list: 

125 self.page_list.remove_all(filename) 

126 for nav_list in self.nav_lists: 

127 nav_list.remove_nodes(filename) 

128 

129 self.update_numbers() 

130 

131 def get_author(self, name: str) -> NCXAuthor | None: 

132 for author in self.authors: 

133 if author.text == name: 

134 return author 

135 

136 return None 

137 

138 def add_author(self, name: str) -> NCXAuthor: 

139 author = NCXAuthor(soup=self.soup, text=name) 

140 if self._authors is not None: 

141 self._authors.append(author) 

142 author.insert_self_in_soup(self.soup) 

143 

144 return author 

145 

146 def remove_author(self, author: str | NCXAuthor) -> NCXAuthor | None: 

147 if not isinstance(author, NCXAuthor): 

148 author_or_none = self.get_author(author) 

149 if author_or_none is None: 

150 return None 

151 author = author_or_none 

152 

153 if self._authors is not None: 

154 self._authors.remove(author) 

155 author.tag.decompose() 

156 

157 return author 

158 

159 def add_nav_list(self, items: Iterable[TOCEntryData]) -> NCXNavList: 

160 nav_list = NCXNavList( 

161 self.soup, 

162 own_filename=self.filename, 

163 parent=self, 

164 ) 

165 

166 for entry in items: 

167 __ = nav_list.add(filename=entry.filename, text=entry.label) 

168 

169 nav_list.insert_self_in_soup() 

170 return nav_list 

171 

172 def reset_page_list(self, entries: list[PageBreakData]): 

173 if not self.page_list: 

174 self._page_list = NCXPageList( 

175 self.soup, 

176 own_filename=self.filename, 

177 parent=self, 

178 ) 

179 self._page_list.insert_self_in_soup() 

180 

181 assert self.page_list 

182 self.page_list.reset(entries) 

183 

184 def update_total_page_count(self): 

185 if not self.page_list: 

186 raise EPUBError("No page list to update total page count from") 

187 

188 self.head.total_page_count = ( 

189 len(self.page_list.items) if self.page_list else None 

190 ) 

191 

192 def update_depth(self): 

193 self.head.depth = self.nav_map.max_depth() 

194 

195 def update_max_page_number(self): 

196 if not self.page_list: 

197 raise EPUBError("No page list to update max page number from from") 

198 self.head.max_page_number = ( 

199 self.page_list.largest_page_number if self.page_list else None 

200 ) 

201 

202 def _update_play_order_recursive( 

203 self, 

204 nav_point: NCXNavPoint | NCXNavMap, 

205 start: int, 

206 ) -> int: 

207 for item in nav_point.items: 

208 item.play_order = start 

209 start = self._update_play_order_recursive(item, start + 1) 

210 

211 return start 

212 

213 def update_play_order(self) -> None: 

214 __ = self._update_play_order_recursive(self.nav_map, 1) 

215 

216 def update_numbers(self): 

217 """ 

218 Update required numbers in the head and nav map of the NCX file: 

219 - max depth; 

220 - max page number (if there is a page list); 

221 - total page count (if there is a page list); 

222 - play order. 

223 """ 

224 

225 self.update_depth() 

226 self.update_play_order() 

227 

228 if self.page_list: 

229 self.update_max_page_number() 

230 self.update_total_page_count() 

231 

232 def sync_head(self, metadata: BookMetadata): 

233 """ 

234 Sync metadata from the package document metadata to the NCX 

235 document, erasing any existing head > meta items. Should be used 

236 after populating the navMap and pageList (if there is one), to 

237 get an accurate page and depth count. 

238 """ 

239 head = NCXHead( 

240 soup=self.soup, 

241 tag=self.soup.new_tag("head"), 

242 ) 

243 

244 if metadata.identifier: 

245 head.uid = metadata.identifier 

246 

247 self.head.depth = 0 

248 head.total_page_count = None 

249 head.max_page_number = None 

250 

251 __ = self.soup.head.replace_with(head.tag) 

252 self._head = head 

253 self.update_numbers() 

254 

255 return head 

256 

257 def sync_toc(self, nav: NavigationDocument): 

258 """ 

259 Sync the NCX navMap to match the given TOC structure, erasing 

260 any existing navMap items. 

261 """ 

262 

263 self.nav_map.reset([]) 

264 

265 count = 1 

266 max_count = len(list(nav.toc.tag.find_all(True))) * 2 

267 

268 def recurse_items( 

269 nav_point: NCXNavPoint | NCXNavMap, 

270 toc_item: NavItem | NavRoot, 

271 ): 

272 nonlocal count 

273 count += 1 

274 

275 if count > max_count: 

276 raise EPUBError("Infinite recursion detected in TOC structure") 

277 

278 for sub_toc_item in toc_item.items: 

279 sub_nav_point = nav_point.add(sub_toc_item.text, sub_toc_item.filename) 

280 sub_nav_point.tag["id"] = f"navPoint{count}" 

281 recurse_items(sub_nav_point, sub_toc_item) 

282 

283 recurse_items(self.nav_map, nav.toc) 

284 return self.nav_map 

285 

286 def sync_page_list(self, nav: NavigationDocument): 

287 if not nav.page_list: 

288 raise EPUBError("No page list in navigation document to sync from") 

289 

290 self.reset_page_list([]) 

291 assert self.page_list 

292 

293 for item in nav.page_list.items: 

294 __ = self.page_list.add(item.text, item.filename) 

295 

296 def on_soup_change(self): 

297 del self._head 

298 del self._title 

299 del self._authors 

300 del self._nav_map 

301 del self._page_list 

302 del self._nav_lists 

303 self._head = None 

304 self._title = None 

305 self._authors = None 

306 self._nav_map = None 

307 self._page_list = None 

308 self._nav_lists = None 

309 

310 @override 

311 def on_content_change(self): 

312 super().on_content_change() 

313 self.on_soup_change()