Coverage for src/epublib/resources/__init__.py: 98%
137 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 12:30 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 12:30 -0300
1import io
2from pathlib import Path
3from typing import IO, override
4from zipfile import ZipInfo
6import bs4
8from epublib.exceptions import ClosedEPUBError, EPUBError
9from epublib.media_type import Category, MediaType
10from epublib.source import zip_info_now
11from epublib.util import strip_fragment
14def info_to_zipinfo(info: ZipInfo | str | Path) -> ZipInfo:
15 if isinstance(info, ZipInfo):
16 return info
18 return ZipInfo(filename=str(strip_fragment(info)), date_time=zip_info_now())
21class Resource:
22 """Base class for all resources (i.e. files) in an EPUB file."""
24 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None:
25 self.zipinfo: ZipInfo = info_to_zipinfo(info)
26 self._file: IO[bytes] = io.BytesIO(file) if isinstance(file, bytes) else file
27 self._content: bytes | None = None
29 @classmethod
30 def from_path(cls, filename: str | Path, location: str | Path):
31 file = open(filename, "rb")
32 zipinfo = ZipInfo.from_file(filename, location, strict_timestamps=False)
33 return cls(file, zipinfo)
35 @override
36 def __repr__(self) -> str:
37 return f"{self.__class__.__name__}({self.filename})"
39 def on_content_change(self):
40 pass
42 @property
43 def filename(self):
44 return self.zipinfo.filename
46 @filename.setter
47 def filename(self, value: str):
48 self._set_filename(value)
50 def _set_filename(self, value: str):
51 self.zipinfo.filename = strip_fragment(value)
53 def get_content(self, cache: bool = True) -> bytes:
54 """
55 Get the content of this resource. If this content hasn't been
56 cached yet and `cache` is False, the content will be read
57 directly from the underlying file without storing it in memory.
58 """
60 self.check_closed()
61 content = self._content
62 if content is None:
63 content = self._file.read()
64 __ = self._file.seek(0)
65 if cache:
66 self._content = content
68 return content
70 @property
71 def content(self) -> bytes:
72 return self.get_content()
74 @content.setter
75 def content(self, value: bytes):
76 self.check_closed()
77 self._set_content(value)
79 def _set_content(self, value: bytes, content_change: bool = True):
80 self._content = value
81 if content_change:
82 self.on_content_change()
84 def get_title(self):
85 return self.filename
87 @property
88 def closed(self):
89 return self._file.closed
91 def check_closed(self):
92 if self.closed:
93 raise ClosedEPUBError(f"Using resource {self.filename} after closing")
95 def close(self):
96 del self._content
97 self._content = None
98 self._file.close()
101class XMLResource[S: bs4.BeautifulSoup = bs4.BeautifulSoup](Resource):
102 """A resource that is an XML file."""
104 soup_class: type[S] = bs4.BeautifulSoup # type: ignore[reportAssignmentType]
106 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None:
107 super().__init__(file, info)
108 self._soup: None | S = None
110 @property
111 def soup(self) -> S:
112 if self._soup is None:
113 self._soup = self.soup_class(self.content, "xml")
114 return self._soup
116 @soup.setter
117 def soup(self, value: S):
118 self._set_soup(value)
120 def _set_soup(self, value: S):
121 self._soup = value
123 @override
124 def get_content(self, cache: bool = True) -> bytes:
125 if self._soup is not None:
126 self._set_content(self._soup.encode(), content_change=False)
127 return super().get_content()
129 @override
130 def on_content_change(self):
131 super().on_content_change()
132 del self._soup
133 self._soup = None
135 @override
136 def get_title(self):
137 if self.soup.title and self.soup.title.string:
138 return self.soup.title.string
139 return super().get_title()
142class PublicationResource(Resource):
143 """
144 A resource that contributes to the logic and rendering of the publication.
146 This includes resources like the package document, content documents (XHTML),
147 CSS stylesheets, audio, video, images, fonts, and scripts.
149 This class provides the `media_type` attribute.
150 """
152 def __init__(
153 self,
154 file: IO[bytes] | bytes,
155 info: ZipInfo | str | Path,
156 media_type: MediaType | str | None = None,
157 ) -> None:
158 super().__init__(file, info)
159 if media_type is None:
160 media_type = MediaType.from_filename(self.zipinfo.filename)
161 if media_type is None:
162 raise EPUBError(
163 f"Cannot determine media type of {self.zipinfo.filename}"
164 )
166 self.media_type: MediaType = MediaType(media_type)
168 @classmethod
169 @override
170 def from_path(
171 cls,
172 filename: str | Path,
173 location: str | Path,
174 media_type: MediaType | str | None = None,
175 ):
176 instance = super().from_path(filename, location)
178 if media_type is not None:
179 instance.media_type = MediaType(media_type)
181 return instance
183 @property
184 def is_foreign(self):
185 return self.media_type.category is Category.FOREIGN
187 @property
188 def category(self):
189 return self.media_type.category
191 @classmethod
192 def from_resource(cls, other: Resource, media_type: MediaType | str | None = None):
193 if other.closed:
194 raise ClosedEPUBError(f"Using resource {other} after closing")
196 return cls(other._file, other.zipinfo, media_type)
199class ContentDocument[S: bs4.BeautifulSoup = bs4.BeautifulSoup]( # type: ignore[reportUnsafeMultipleInheritance]
200 PublicationResource,
201 XMLResource[S],
202):
203 """
204 A publication resource referenced from the spine or a manifest fallback
205 chain that conforms to either the XHTML or SVG content document definitions.
206 """
208 @override
209 def get_title(self):
210 if self.soup.h1 and self.soup.h1.string:
211 return self.soup.h1.string
213 if self.soup.title and self.soup.title.string:
214 return self.soup.title.string
216 if self.soup.body:
217 string = self.soup.body.find(string=True)
218 else:
219 string = self.soup.find(string=True)
221 if string:
222 return string
224 return ""