Coverage for src/epublib/resources/__init__.py: 86%
133 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
1import io
2from mimetypes import guess_file_type
3from pathlib import Path
4from typing import IO, override
5from zipfile import ZipInfo
7import bs4
9from epublib.exceptions import EPUBError
10from epublib.mediatype import Category, MediaType
11from epublib.source import zip_info_now
12from epublib.util import get_absolute_href
15def info_to_zipinfo(info: ZipInfo | str | Path) -> ZipInfo:
16 if isinstance(info, ZipInfo):
17 return info
19 return ZipInfo(filename=str(info), date_time=zip_info_now())
22class Resource:
23 """Base class for all resources (i.e. files) in an EPUB file."""
25 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None:
26 self.zipinfo: ZipInfo = info_to_zipinfo(info)
27 self._file: IO[bytes] | None = (
28 io.BytesIO(file) if isinstance(file, bytes) else file
29 )
30 self._content: bytes | None = None
31 self._closed: bool = False
33 @classmethod
34 def from_path(cls, filename: str | Path, location: str | Path):
35 file = open(filename, "rb")
36 zipinfo = ZipInfo.from_file(filename, location, strict_timestamps=False)
37 return cls(file, zipinfo)
39 @override
40 def __repr__(self) -> str:
41 return f"{self.__class__.__name__}({self.filename})"
43 def on_content_change(self):
44 pass
46 @property
47 def filename(self):
48 return self.zipinfo.filename
50 @filename.setter
51 def filename(self, value: str):
52 self._set_filename(value)
54 def _set_filename(self, value: str):
55 self.zipinfo.filename = value
57 @property
58 def content(self) -> bytes:
59 self.check_closed()
60 if self._content is None:
61 if self._file is None:
62 return b""
63 self._content = self._file.read()
64 __ = self._file.seek(0)
65 return self._content
67 @content.setter
68 def content(self, value: bytes):
69 self.check_closed()
70 self._set_content(value)
72 def _set_content(self, value: bytes, content_change: bool = True):
73 self._content = value
74 if content_change:
75 self.on_content_change()
77 def free(self):
78 del self._content
79 self._content = None
80 self.on_content_change()
82 def get_title(self):
83 return self.filename
85 def check_closed(self):
86 if self._closed:
87 raise EPUBError(f"Using resource {self.filename} after closing")
89 def close(self):
90 self.free()
91 if self._file is not None:
92 self._file.close()
93 self._file = None
95 def href_to_filename[T: (str, Path)](self, href: T) -> T:
96 return get_absolute_href(self.filename, href)
99class XMLResource[S: bs4.BeautifulSoup = bs4.BeautifulSoup](Resource):
100 """A resource that is an XML file."""
102 soup_class: type[S] = bs4.BeautifulSoup # type: ignore[reportAssignmentType]
104 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None:
105 super().__init__(file, info)
106 self._soup: None | S = None
108 @property
109 def soup(self) -> S:
110 if self._soup is None:
111 self._soup = self.soup_class(self.content, "xml")
112 return self._soup
114 @soup.setter
115 def soup(self, value: S):
116 self._set_soup(value)
118 def _set_soup(self, value: S):
119 self._soup = value
121 @property
122 @override
123 def content(self):
124 if self._soup is not None:
125 self._set_content(self._soup.encode(), content_change=False)
126 return super().content
128 @content.setter
129 def content(self, value: bytes):
130 super()._set_content(value)
132 @override
133 def on_content_change(self):
134 super().on_content_change()
135 del self._soup
136 self._soup = None
138 @override
139 def get_title(self):
140 if self.soup.title and self.soup.title.string:
141 return self.soup.title.string
142 return super().get_title()
145class PublicationResource(Resource):
146 """
147 A resource that contributes to the logic and rendering of the publication.
149 This includes resources like the package document, content documents (XHTML),
150 CSS stylesheets, audio, video, images, fonts, and scripts.
151 """
153 def __init__(
154 self,
155 file: IO[bytes] | bytes,
156 info: ZipInfo | str | Path,
157 media_type: MediaType | str | None = None,
158 ) -> None:
159 super().__init__(file, info)
160 if media_type is None:
161 media_type = guess_file_type(self.zipinfo.filename)[0]
162 if media_type is None:
163 raise EPUBError(
164 f"Cannot determine media type of {self.zipinfo.filename}"
165 )
167 media_type = MediaType.coalesce(media_type)
168 self.media_type: MediaType | str = media_type
170 @property
171 def is_foreign(self):
172 return isinstance(self.media_type, str)
174 @property
175 def category(self):
176 if isinstance(self.media_type, str):
177 return Category.FOREIGN
178 return self.media_type.category
180 @classmethod
181 def from_resource(cls, other: Resource, media_type: str | MediaType | None = None):
182 if other._file is None or other._closed:
183 raise EPUBError(f"Using resource {other} after closing")
185 return cls(other._file, other.zipinfo, media_type)
188class ContentDocument[S: bs4.BeautifulSoup = bs4.BeautifulSoup]( # type: ignore[reportUnsafeMultipleInheritance]
189 PublicationResource,
190 XMLResource[S],
191):
192 """
193 A publication resource referenced from the spine or a manifest fallback
194 chain that conforms to either the XHTML or SVG content document definitions.
195 """
197 @override
198 def get_title(self):
199 if self.soup.h1 and self.soup.h1.string:
200 return self.soup.h1.string
201 return super().get_title()