Coverage for src/epublib/parse.py: 100%
45 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
1from zipfile import ZipInfo
3from epublib.exceptions import NotEPUBError
4from epublib.mediatype import MediaType
5from epublib.package.manifest import ManifestItem
6from epublib.package.resource import PackageDocument
7from epublib.resources import Resource, XMLResource
8from epublib.resources.create import create_resource
9from epublib.source import SourceProtocol
10from epublib.util import attr_to_str
13def parse_container_file(source: SourceProtocol):
14 """
15 Parse the container.xml file at the root of the document. Only
16 consider the first rootfile. Return also the filename of the package
17 document
18 """
20 try:
21 info = source.getinfo("META-INF/container.xml")
22 except KeyError as error:
23 raise NotEPUBError("Missing 'META-INF/container.xml'") from error
24 container = XMLResource(source.open(info), info)
25 rootfile = container.soup.select_one("rootfile")
27 if not rootfile:
28 raise NotEPUBError("Can't find rootfile in container.xml")
30 package_document_filename = attr_to_str(rootfile.attrs.get("full-path", ""))
32 if not package_document_filename:
33 raise NotEPUBError("rootfile in container.xml has no full-path")
35 return container, package_document_filename
38def parse_package_document(source: SourceProtocol, filename: str):
39 """Parse the package document (META-INF/container.xml)"""
41 info = source.getinfo(filename)
42 return PackageDocument(source.open(info), info)
45def init_resource(
46 source: SourceProtocol,
47 info: ZipInfo,
48 manifest_item: ManifestItem | None,
49):
50 """Initialize a Resource object from a ZipInfo and optional ManifestItem"""
52 args = source.open(info), info
54 if manifest_item is None:
55 return Resource(*args)
57 media_type = MediaType.coalesce(manifest_item.media_type)
59 return create_resource(
60 source.open(info),
61 info,
62 media_type,
63 is_nav=manifest_item.has_property("nav"),
64 )
67def parse(
68 source: SourceProtocol,
69) -> tuple[XMLResource, PackageDocument, list[Resource]]:
70 """Read and parse the EPUB file from the source"""
71 container_file, package_document_filename = parse_container_file(source)
72 package_document = parse_package_document(source, package_document_filename)
74 resources: list[Resource] = []
75 for info in source.infolist():
76 if info.is_dir():
77 continue
79 manifest_item = package_document.manifest.get(info.filename)
80 if info.filename == package_document.filename:
81 resources.append(package_document)
82 elif info.filename == container_file.filename:
83 resources.append(container_file)
84 else:
85 resources.append(init_resource(source, info, manifest_item))
87 return container_file, package_document, resources