Coverage for src/epublib/resources/__init__.py: 98%

137 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 12:30 -0300

1import io 

2from pathlib import Path 

3from typing import IO, override 

4from zipfile import ZipInfo 

5 

6import bs4 

7 

8from epublib.exceptions import ClosedEPUBError, EPUBError 

9from epublib.media_type import Category, MediaType 

10from epublib.source import zip_info_now 

11from epublib.util import strip_fragment 

12 

13 

14def info_to_zipinfo(info: ZipInfo | str | Path) -> ZipInfo: 

15 if isinstance(info, ZipInfo): 

16 return info 

17 

18 return ZipInfo(filename=str(strip_fragment(info)), date_time=zip_info_now()) 

19 

20 

21class Resource: 

22 """Base class for all resources (i.e. files) in an EPUB file.""" 

23 

24 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None: 

25 self.zipinfo: ZipInfo = info_to_zipinfo(info) 

26 self._file: IO[bytes] = io.BytesIO(file) if isinstance(file, bytes) else file 

27 self._content: bytes | None = None 

28 

29 @classmethod 

30 def from_path(cls, filename: str | Path, location: str | Path): 

31 file = open(filename, "rb") 

32 zipinfo = ZipInfo.from_file(filename, location, strict_timestamps=False) 

33 return cls(file, zipinfo) 

34 

35 @override 

36 def __repr__(self) -> str: 

37 return f"{self.__class__.__name__}({self.filename})" 

38 

39 def on_content_change(self): 

40 pass 

41 

42 @property 

43 def filename(self): 

44 return self.zipinfo.filename 

45 

46 @filename.setter 

47 def filename(self, value: str): 

48 self._set_filename(value) 

49 

50 def _set_filename(self, value: str): 

51 self.zipinfo.filename = strip_fragment(value) 

52 

53 def get_content(self, cache: bool = True) -> bytes: 

54 """ 

55 Get the content of this resource. If this content hasn't been 

56 cached yet and `cache` is False, the content will be read 

57 directly from the underlying file without storing it in memory. 

58 """ 

59 

60 self.check_closed() 

61 content = self._content 

62 if content is None: 

63 content = self._file.read() 

64 __ = self._file.seek(0) 

65 if cache: 

66 self._content = content 

67 

68 return content 

69 

70 @property 

71 def content(self) -> bytes: 

72 return self.get_content() 

73 

74 @content.setter 

75 def content(self, value: bytes): 

76 self.check_closed() 

77 self._set_content(value) 

78 

79 def _set_content(self, value: bytes, content_change: bool = True): 

80 self._content = value 

81 if content_change: 

82 self.on_content_change() 

83 

84 def get_title(self): 

85 return self.filename 

86 

87 @property 

88 def closed(self): 

89 return self._file.closed 

90 

91 def check_closed(self): 

92 if self.closed: 

93 raise ClosedEPUBError(f"Using resource {self.filename} after closing") 

94 

95 def close(self): 

96 del self._content 

97 self._content = None 

98 self._file.close() 

99 

100 

101class XMLResource[S: bs4.BeautifulSoup = bs4.BeautifulSoup](Resource): 

102 """A resource that is an XML file.""" 

103 

104 soup_class: type[S] = bs4.BeautifulSoup # type: ignore[reportAssignmentType] 

105 

106 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None: 

107 super().__init__(file, info) 

108 self._soup: None | S = None 

109 

110 @property 

111 def soup(self) -> S: 

112 if self._soup is None: 

113 self._soup = self.soup_class(self.content, "xml") 

114 return self._soup 

115 

116 @soup.setter 

117 def soup(self, value: S): 

118 self._set_soup(value) 

119 

120 def _set_soup(self, value: S): 

121 self._soup = value 

122 

123 @override 

124 def get_content(self, cache: bool = True) -> bytes: 

125 if self._soup is not None: 

126 self._set_content(self._soup.encode(), content_change=False) 

127 return super().get_content() 

128 

129 @override 

130 def on_content_change(self): 

131 super().on_content_change() 

132 del self._soup 

133 self._soup = None 

134 

135 @override 

136 def get_title(self): 

137 if self.soup.title and self.soup.title.string: 

138 return self.soup.title.string 

139 return super().get_title() 

140 

141 

142class PublicationResource(Resource): 

143 """ 

144 A resource that contributes to the logic and rendering of the publication. 

145 

146 This includes resources like the package document, content documents (XHTML), 

147 CSS stylesheets, audio, video, images, fonts, and scripts. 

148 

149 This class provides the `media_type` attribute. 

150 """ 

151 

152 def __init__( 

153 self, 

154 file: IO[bytes] | bytes, 

155 info: ZipInfo | str | Path, 

156 media_type: MediaType | str | None = None, 

157 ) -> None: 

158 super().__init__(file, info) 

159 if media_type is None: 

160 media_type = MediaType.from_filename(self.zipinfo.filename) 

161 if media_type is None: 

162 raise EPUBError( 

163 f"Cannot determine media type of {self.zipinfo.filename}" 

164 ) 

165 

166 self.media_type: MediaType = MediaType(media_type) 

167 

168 @classmethod 

169 @override 

170 def from_path( 

171 cls, 

172 filename: str | Path, 

173 location: str | Path, 

174 media_type: MediaType | str | None = None, 

175 ): 

176 instance = super().from_path(filename, location) 

177 

178 if media_type is not None: 

179 instance.media_type = MediaType(media_type) 

180 

181 return instance 

182 

183 @property 

184 def is_foreign(self): 

185 return self.media_type.category is Category.FOREIGN 

186 

187 @property 

188 def category(self): 

189 return self.media_type.category 

190 

191 @classmethod 

192 def from_resource(cls, other: Resource, media_type: MediaType | str | None = None): 

193 if other.closed: 

194 raise ClosedEPUBError(f"Using resource {other} after closing") 

195 

196 return cls(other._file, other.zipinfo, media_type) 

197 

198 

199class ContentDocument[S: bs4.BeautifulSoup = bs4.BeautifulSoup]( # type: ignore[reportUnsafeMultipleInheritance] 

200 PublicationResource, 

201 XMLResource[S], 

202): 

203 """ 

204 A publication resource referenced from the spine or a manifest fallback 

205 chain that conforms to either the XHTML or SVG content document definitions. 

206 """ 

207 

208 @override 

209 def get_title(self): 

210 if self.soup.h1 and self.soup.h1.string: 

211 return self.soup.h1.string 

212 

213 if self.soup.title and self.soup.title.string: 

214 return self.soup.title.string 

215 

216 if self.soup.body: 

217 string = self.soup.body.find(string=True) 

218 else: 

219 string = self.soup.find(string=True) 

220 

221 if string: 

222 return string 

223 

224 return ""