Coverage for src/epublib/ncx/resource.py: 91%

162 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1from collections.abc import Iterable, Sequence 

2from pathlib import Path 

3from typing import IO, override 

4from zipfile import ZipInfo 

5 

6from epublib.exceptions import EPUBError 

7from epublib.mediatype import MediaType 

8from epublib.nav import NavItem 

9from epublib.nav.resource import NavigationDocument 

10from epublib.nav.util import PageBreakData, TOCEntryData 

11from epublib.ncx import ( 

12 NCXAuthor, 

13 NCXHead, 

14 NCXNavList, 

15 NCXNavMap, 

16 NCXNavPoint, 

17 NCXPageList, 

18 NCXTitle, 

19) 

20from epublib.package.metadata import BookMetadata 

21from epublib.resources import PublicationResource, XMLResource 

22from epublib.soup import NCXSoup 

23from epublib.util import get_absolute_href, get_relative_href 

24 

25 

26class NCXFile( # type: ignore[reportUnsafeMultipleInheritance] 

27 PublicationResource, 

28 XMLResource[NCXSoup], 

29): 

30 """ 

31 The NCX document of the EPUB file, sometimes known as the 'toc.ncx' file. 

32 This is used in EPUB2 files for navigation, and was largely superseded by 

33 the package document in EPUB3. Support for it in EPUB3 is optional. 

34 """ 

35 

36 soup_class: type[NCXSoup] = NCXSoup 

37 

38 def __init__( 

39 self, 

40 file: IO[bytes] | bytes, 

41 info: ZipInfo | str | Path, 

42 media_type: MediaType | str = MediaType.NCX, 

43 ) -> None: 

44 super().__init__(file, info, media_type) 

45 self._head: NCXHead | None = None 

46 self._title: NCXTitle | None = None 

47 self._authors: Sequence[NCXAuthor] | None = None 

48 self._nav_map: NCXNavMap | None = None 

49 self._page_list: NCXPageList | None = None 

50 self._nav_lists: Sequence[NCXNavList] | None = None 

51 

52 @property 

53 def head(self) -> NCXHead: 

54 if self._head is None: 

55 self._head = NCXHead(self.soup.head, self.filename) 

56 return self._head 

57 

58 @property 

59 def title(self) -> NCXTitle: 

60 if self._title is None: 

61 self._title = NCXTitle.from_tag(self.soup.docTitle) 

62 return self._title 

63 

64 @property 

65 def authors(self) -> Sequence[NCXAuthor]: 

66 if self._authors is None: 

67 self._authors = tuple( 

68 NCXAuthor.from_tag(tag) for tag in self.soup.select("docAuthor") 

69 ) 

70 

71 return self._authors 

72 

73 @property 

74 def nav_map(self) -> NCXNavMap: 

75 if self._nav_map is None: 

76 self._nav_map = NCXNavMap(self.soup, self.soup.navMap, self.filename) 

77 return self._nav_map 

78 

79 @property 

80 def page_list(self) -> NCXPageList | None: 

81 if self._page_list is None: 

82 tag = self.soup.select_one("pageList") 

83 if tag: 

84 self._page_list = NCXPageList(self.soup, tag, self.filename) 

85 return self._page_list 

86 

87 @property 

88 def nav_lists(self) -> Sequence[NCXNavList]: 

89 if self._nav_lists is None: 

90 self._nav_lists = tuple( 

91 NCXNavList(self.soup, tag, self.filename) 

92 for tag in self.soup.select("navList") 

93 ) 

94 

95 return self._nav_lists 

96 

97 def add_to_nav_map( 

98 self, 

99 filename: str, 

100 title: str, 

101 position: int | None = None, 

102 ): 

103 href = get_relative_href(self.filename, filename) 

104 return self.nav_map.add_item(href=href, text=title, position=position) 

105 

106 def remove(self, filename: str): 

107 # Todo: remove references to images and audio as well 

108 self.nav_map.remove(filename) 

109 if self.page_list: 

110 self.page_list.remove(filename) 

111 for nav_list in self.nav_lists: 

112 nav_list.remove(filename) 

113 

114 def add_author(self, name: str) -> NCXAuthor: 

115 author = NCXAuthor(name=name) 

116 author.insert_self_in_soup(self.soup) 

117 

118 return author 

119 

120 def add_nav_list(self, items: Iterable[TOCEntryData]) -> NCXNavList: 

121 nav_list = NCXNavList( 

122 self.soup, 

123 tag=None, 

124 base_filename=self.filename, 

125 ) 

126 

127 for entry in items: 

128 href = get_relative_href(self.filename, entry.filename) + ( 

129 f"#{entry.id}" if entry.id is not None else "" 

130 ) 

131 __ = nav_list.add_item(href=href, text=entry.label) 

132 

133 return nav_list 

134 

135 def reset_nav_map(self, entries: list[TOCEntryData]): 

136 self.nav_map.reset(entries) 

137 

138 def reset_page_list(self, entries: list[PageBreakData]): 

139 if not self.page_list: 

140 self._page_list = NCXPageList( 

141 self.soup, 

142 tag=None, 

143 base_filename=self.filename, 

144 ) 

145 

146 assert self.page_list 

147 self.page_list.reset(entries) 

148 

149 def update_total_page_count(self): 

150 if not self.page_list: 

151 raise EPUBError("No page list to update total page count from") 

152 

153 self.head.total_page_count = ( 

154 len(self.page_list.items) if self.page_list else None 

155 ) 

156 

157 def update_depth(self): 

158 self.head.depth = self.nav_map.max_depth 

159 

160 def update_max_page_number(self): 

161 if not self.page_list: 

162 raise EPUBError("No page list to update max page number from from") 

163 self.head.max_page_number = ( 

164 self.page_list.largest_page_number if self.page_list else None 

165 ) 

166 

167 def _update_play_order_recursive( 

168 self, 

169 nav_point: NCXNavPoint, 

170 start: int, 

171 ) -> int: 

172 for item in nav_point.items: 

173 item.play_order = start 

174 start = self._update_play_order_recursive(item, start + 1) 

175 

176 return start + 1 

177 

178 def update_play_order(self) -> None: 

179 __ = self._update_play_order_recursive(self.nav_map, 1) 

180 

181 def update_numbers(self): 

182 """ 

183 Update required numbers in the head and nav map of the NCX file: 

184 - max depth; 

185 - max page number (if there is a page list); 

186 - total page count (if there is a page list); 

187 - play order. 

188 """ 

189 

190 self.update_depth() 

191 self.update_play_order() 

192 

193 if self.page_list: 

194 self.update_max_page_number() 

195 self.update_total_page_count() 

196 

197 def sync_head(self, metadata: BookMetadata): 

198 """ 

199 Sync metadata from the package document metadata to the NCX 

200 document, erasing any existing head > meta items. Should be used 

201 after populating the navMap and pageList (if there is one), to 

202 get an accurate page and depth count. 

203 """ 

204 head = NCXHead(self.soup.new_tag("head"), self.filename) 

205 

206 if metadata.identifier: 

207 head.uid = metadata.identifier 

208 

209 self.head.depth = 0 

210 head.total_page_count = None 

211 head.max_page_number = None 

212 

213 __ = self.soup.head.replace_with(head.tag) 

214 self._head = head 

215 self.update_numbers() 

216 

217 return head 

218 

219 def sync_toc(self, nav: NavigationDocument): 

220 """ 

221 Sync the NCX navMap to match the given TOC structure, erasing 

222 any existing navMap items. Should be used after populating. 

223 """ 

224 if not nav.toc: 

225 raise EPUBError("No TOC in navigation document to sync from") 

226 

227 original_filename = nav.filename 

228 

229 nav_map = NCXNavMap(self.soup, tag=None, base_filename=self.filename) 

230 

231 count = 1 << 16 

232 

233 def recurse_items( 

234 nav_point: NCXNavPoint, 

235 toc_item: NavItem, 

236 ): 

237 nonlocal count 

238 count -= 1 

239 

240 if count <= 0: 

241 raise EPUBError("Infinite recursion detected in TOC structure") 

242 

243 for sub_toc_item in toc_item.items: 

244 absolute_filename = get_absolute_href( 

245 original_filename, 

246 sub_toc_item.href, 

247 ) 

248 relative_filename = get_relative_href( 

249 self.filename, 

250 absolute_filename, 

251 ) 

252 sub_nav_point = nav_point.add_item(sub_toc_item.text, relative_filename) 

253 recurse_items(sub_nav_point, sub_toc_item) 

254 

255 recurse_items(nav_map, nav.toc) 

256 

257 __ = self.soup.navMap.replace_with(nav_map.tag) 

258 self._nav_map = nav_map 

259 

260 return nav_map 

261 

262 def sync_page_list(self, nav: NavigationDocument): 

263 if not nav.page_list: 

264 raise EPUBError("No page list in navigation document to sync from") 

265 original_filename = nav.filename 

266 

267 self.reset_page_list([]) 

268 assert self.page_list 

269 

270 for item in nav.page_list.items: 

271 absolute_filename = get_absolute_href( 

272 original_filename, 

273 item.href, 

274 ) 

275 relative_filename = get_relative_href( 

276 self.filename, 

277 absolute_filename, 

278 ) 

279 

280 __ = self.page_list.add_item(item.text, relative_filename) 

281 

282 def on_soup_change(self): 

283 del self._head 

284 del self._title 

285 del self._authors 

286 del self._nav_map 

287 del self._page_list 

288 del self._nav_lists 

289 self._head = None 

290 self._title = None 

291 self._authors = None 

292 self._nav_map = None 

293 self._page_list = None 

294 self._nav_lists = None 

295 

296 @override 

297 def on_content_change(self): 

298 super().on_content_change() 

299 self.on_soup_change()