Coverage for src/epublib/__init__.py: 95%
168 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
1from collections.abc import Generator
2from pathlib import Path
3from typing import IO, Any, cast, override
4from zipfile import ZipFile, is_zipfile
6from epublib.create import EPUBCreator
7from epublib.exceptions import ClosedEPUBError, EPUBError, NotEPUBError
8from epublib.identifier import EPUBId
9from epublib.mediatype import Category, MediaType
10from epublib.nav.reset import (
11 create_landmarks,
12 create_page_list,
13 create_toc,
14 reset_landmarks,
15 reset_page_list,
16 reset_toc,
17)
18from epublib.nav.resource import NavigationDocument
19from epublib.ncx.reset import generate_ncx, reset_ncx
20from epublib.ncx.resource import NCXFile
21from epublib.package.manifest import (
22 BookManifest,
23 ManifestItem,
24 detect_manifest_properties,
25)
26from epublib.package.metadata import BookMetadata, ValuedMetadataItem
27from epublib.package.resource import PackageDocument
28from epublib.package.spine import BookSpine, SpineItemRef
29from epublib.parse import parse
30from epublib.resources import (
31 ContentDocument,
32 PublicationResource,
33 Resource,
34 XMLResource,
35)
36from epublib.resources.manager import ResourceIdentifier, ResourceManager
37from epublib.source import (
38 DirectorySink,
39 DirectorySource,
40 SinkProtocol,
41 SourceProtocol,
42)
43from epublib.util import get_epublib_version
46class EPUB:
47 """
48 The main class for reading, writing, and manipulating EPUB files.
49 """
51 def __init__(
52 self,
53 file: IO[bytes] | str | Path | None = None,
54 generator_tag: bool = True,
55 ) -> None:
56 self.source: SourceProtocol
58 if file is None:
59 self.source = ZipFile(
60 EPUBCreator(add_generator_tag=generator_tag).to_file()
61 )
62 elif is_zipfile(file):
63 self.source = ZipFile(file)
64 elif (isinstance(file, str) or isinstance(file, Path)) and Path(file).is_dir():
65 self.source = DirectorySource(file)
66 else:
67 raise NotEPUBError(f"file '{file}' is not ZIP nor folder")
69 self._closed: bool = False
70 self.container_file: XMLResource
71 self.package_document: PackageDocument
72 self.container_file, self.package_document, resources = parse(self.source)
73 self.resources: ResourceManager = ResourceManager(
74 resources,
75 container_file=self.container_file,
76 package_document=self.package_document,
77 nav_getter=lambda: self.nav,
78 ncx_getter=lambda: self.ncx,
79 )
81 self.original_path: Path | None = (
82 Path(file) if isinstance(file, str) or isinstance(file, Path) else None
83 )
85 if generator_tag:
86 self.add_generator_tag()
88 def close(self):
89 for resource in self.resources:
90 resource.close()
91 self._closed = True
93 def is_closed(self):
94 return self._closed
96 def _check_closed(self, msg: str = "EPUB is already closed"):
97 if self._closed:
98 raise ClosedEPUBError(msg)
100 def __enter__(self):
101 return self
103 def __exit__(self, *args: Any): # type: ignore[Any]
104 self.close()
106 def add_generator_tag(self):
107 """Add a generator meta tag to the metadata."""
109 generator = self.metadata.get("generator")
110 if not generator:
111 generator = self.metadata.add("generator", "Edited with epublib")
113 version = get_epublib_version()
114 version_item = self.metadata.get("epublib version")
115 if not version_item and version:
116 __ = self.metadata.add("epublib version", version)
118 def remove_generator_tag(self):
119 """Remove the epublib generator tag of the metadata, if any."""
121 generator = self.metadata.get("generator")
122 if (
123 generator
124 and isinstance(generator, ValuedMetadataItem)
125 and "epublib" in generator.value
126 ):
127 self.metadata.remove_item(generator)
129 version_item = self.metadata.get("epublib version")
130 if version_item:
131 self.metadata.remove_item(version_item)
133 def write_to_sink(self, out: SinkProtocol):
134 """Write this epub to a sink"""
136 self._check_closed("trying to write closed EPUB")
138 for resource in self.resources:
139 out.writestr(resource.zipinfo, resource.content)
140 resource.free()
142 def write(self, output_file: IO[bytes] | str | Path) -> None:
143 """Write this epub to a zip file"""
145 out_zip = ZipFile(output_file, mode="w")
146 self.write_to_sink(out_zip)
148 def write_to_folder(self, folder: str | Path):
149 """Write this epub to a folder ('unzipped')"""
151 if not Path(folder).is_dir():
152 raise EPUBError(f"Path '{folder}' is not a directory")
154 out = DirectorySink(folder)
155 self.write_to_sink(out)
157 def documents(self) -> Generator[ContentDocument]:
158 """
159 Retrieve all content documents (XHTML or SVG) from this EPUB
160 """
162 yield from self.resources.filter(ContentDocument)
164 def images(self) -> Generator[PublicationResource]:
165 """
166 Retrieve all image resources from this EPUB
167 """
169 yield from self.resources.filter(Category.IMAGE)
171 def scripts(self) -> Generator[PublicationResource]:
172 """
173 Retrieve all JavaScript resources from this EPUB
174 """
176 return (
177 resource
178 for resource in self.resources.filter(Category.OTHER)
179 if cast(MediaType, resource.media_type).is_js()
180 )
182 def styles(self) -> Generator[PublicationResource]:
183 """
184 Retrieve all CSS resources from this EPUB
185 """
187 return (
188 resource
189 for resource in self.resources.filter(Category.STYLE)
190 if cast(MediaType, resource.media_type).is_css()
191 )
193 def get_spine_item(
194 self,
195 resource: Resource | ResourceIdentifier,
196 ) -> SpineItemRef | None:
197 """Get spine item associated with a resource or filename"""
199 if isinstance(resource, EPUBId):
200 return self.spine.get(resource)
202 if isinstance(resource, ManifestItem):
203 manifest_item = resource
204 else:
205 manifest_item = self.manifest.get(resource)
207 if not manifest_item:
208 return None
210 return self.spine.get(manifest_item.id)
212 def rename_id(
213 self,
214 old: Resource | ResourceIdentifier,
215 new: EPUBId,
216 ) -> None:
217 """
218 Rename a manifest identifier. Look for references for updating
219 it in the spine items, the cover-image metadata tag, and the toc
220 attribute of the spine element. Using this function is not
221 recommended, as there may be other references to the old id that
222 will become outdated.
223 """
225 if not isinstance(old, ManifestItem):
226 manifest_item = self.manifest.get(old)
227 else:
228 manifest_item = old
230 if not manifest_item:
231 raise EPUBError(f"Can't rename '{old}: not in manifest")
233 old_id = manifest_item.id
235 existing = self.manifest.get(new)
236 if existing:
237 raise EPUBError(f"Can't rename to already existing id '{new}' ({existing})")
239 # cover-image in metadata
240 cover = self.metadata.get("cover-image")
241 if cover and cover:
242 cover.value = new
244 # spine tag
245 if self.spine.tag.attrs["toc"] == old_id:
246 self.spine.tag.attrs["toc"] = new
248 spine_item = self.spine.get(old_id)
249 if spine_item:
250 spine_item.idref = new
252 manifest_item.id = new
254 def get_spine_position(
255 self,
256 resource: Resource | ResourceIdentifier,
257 ) -> int | None:
258 """Get the 0-indexed position of a resource in the spine"""
260 if isinstance(resource, EPUBId):
261 epub_id = resource
262 else:
263 if isinstance(resource, ManifestItem):
264 manifest_item = resource
265 else:
266 manifest_item = self.manifest.get(resource)
268 if not manifest_item:
269 return None
270 epub_id = manifest_item.id
272 return self.spine.get_position(epub_id)
274 def update_manifest_properties(self) -> None:
275 """
276 Update manifest properties by detecting them from the resources
277 See https://www.w3.org/TR/epub-33/#sec-item-resource-properties
278 """
280 for item in self.manifest.items:
281 resource = self.resources.get(item.name, XMLResource)
282 if resource:
283 item.properties = list(
284 set(
285 (item.properties if item.properties is not None else [])
286 + detect_manifest_properties(resource.soup)
287 )
288 )
290 def reset_toc(
291 self,
292 targets_selector: str | None = None,
293 include_filenames: bool = False,
294 spine_only: bool = False, # ensures correct ordering
295 reset_ncx: bool | None = None,
296 resource_class: type[Resource] = ContentDocument,
297 ):
298 """
299 Reset the table of contents in the navigation document by
300 detecting targets in content documents. May replace any
301 existing TOC.
302 """
303 return reset_toc(
304 self,
305 targets_selector,
306 include_filenames,
307 spine_only,
308 reset_ncx,
309 resource_class,
310 )
312 def create_toc(
313 self,
314 targets_selector: str | None = None,
315 include_filenames: bool = False,
316 spine_only: bool = False, # ensures correct ordering
317 reset_ncx: bool | None = None,
318 resource_class: type[Resource] = ContentDocument,
319 ):
320 """
321 Create o new table of contents in the navigation document by
322 detecting targets in content documents. Will raise an error if
323 a TOC already exists.
324 """
325 return create_toc(
326 self,
327 targets_selector,
328 include_filenames,
329 spine_only,
330 reset_ncx,
331 resource_class,
332 )
334 def reset_page_list(
335 self,
336 id_format: str = "page_{page}",
337 label_format: str = "{page}",
338 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]',
339 reset_ncx: bool | None = None,
340 ):
341 """
342 Reset the page list in the navigation document by detecting
343 pagebreaks in content documents. Will replace any existing page
344 list.
345 """
346 return reset_page_list(
347 self,
348 id_format,
349 label_format,
350 pagebreak_selector,
351 reset_ncx,
352 )
354 def create_page_list(
355 self,
356 id_format: str = "page_{page}",
357 label_format: str = "{page}",
358 pagebreak_selector: str = '[role="doc-pagebreak"], [epub|type="pagebreak"]',
359 reset_ncx: bool | None = None,
360 ):
361 """
362 Create new page list in the navigation document by detecting
363 pagebreaks in content documents. Will raise an error if a page
364 list already exists.
365 """
366 return create_page_list(
367 self,
368 id_format,
369 label_format,
370 pagebreak_selector,
371 reset_ncx,
372 )
374 def reset_landmarks(
375 self,
376 include_toc: bool = True,
377 targets_selector: str | None = None,
378 ):
379 """
380 Reset the landmarks in the navigation document by detecting
381 targets in content documents, and optionally including the TOC.
382 Will replace existing landmarks.
383 """
385 return reset_landmarks(self, include_toc, targets_selector)
387 def create_landmarks(
388 self,
389 include_toc: bool = True,
390 targets_selector: str | None = None,
391 ):
392 """
393 Create landmarks in the navigation document by detecting
394 targets in content documents, and optionally including the TOC.
395 Will raise error if landmarks already exist.
396 """
398 return create_landmarks(self, include_toc, targets_selector)
400 def generate_ncx(self, filename: str | Path | None = None) -> NCXFile:
401 return generate_ncx(self, filename)
403 def reset_ncx(self, ncx: NCXFile | None = None) -> NCXFile:
404 return reset_ncx(self, ncx)
406 @property
407 def base_dir(self):
408 """
409 The base directory for the resources in this EPUB. This is an
410 holistic property, and the spec does not define it. There may be
411 more than one base directory in an EPUB. This is the one
412 containing the package document.
413 """
415 return Path(self.package_document.filename).parent
417 @property
418 def manifest(self) -> BookManifest:
419 return self.package_document.manifest
421 @property
422 def metadata(self) -> BookMetadata:
423 return self.package_document.metadata
425 @property
426 def spine(self) -> BookSpine:
427 return self.package_document.spine
429 @property
430 def nav(self):
431 return (
432 self.resources.get(self.manifest.nav.filename, NavigationDocument)
433 if self.manifest.nav
434 else None
435 )
437 @property
438 def ncx(self) -> NCXFile | None:
439 return next(self.resources.filter(NCXFile), None)
441 @override
442 def __repr__(self) -> str:
443 return f"{self.__class__.__name__}(title='{self.metadata.title or id(self)}')"