Coverage for src/epublib/resources/__init__.py: 86%

133 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1import io 

2from mimetypes import guess_file_type 

3from pathlib import Path 

4from typing import IO, override 

5from zipfile import ZipInfo 

6 

7import bs4 

8 

9from epublib.exceptions import EPUBError 

10from epublib.mediatype import Category, MediaType 

11from epublib.source import zip_info_now 

12from epublib.util import get_absolute_href 

13 

14 

15def info_to_zipinfo(info: ZipInfo | str | Path) -> ZipInfo: 

16 if isinstance(info, ZipInfo): 

17 return info 

18 

19 return ZipInfo(filename=str(info), date_time=zip_info_now()) 

20 

21 

22class Resource: 

23 """Base class for all resources (i.e. files) in an EPUB file.""" 

24 

25 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None: 

26 self.zipinfo: ZipInfo = info_to_zipinfo(info) 

27 self._file: IO[bytes] | None = ( 

28 io.BytesIO(file) if isinstance(file, bytes) else file 

29 ) 

30 self._content: bytes | None = None 

31 self._closed: bool = False 

32 

33 @classmethod 

34 def from_path(cls, filename: str | Path, location: str | Path): 

35 file = open(filename, "rb") 

36 zipinfo = ZipInfo.from_file(filename, location, strict_timestamps=False) 

37 return cls(file, zipinfo) 

38 

39 @override 

40 def __repr__(self) -> str: 

41 return f"{self.__class__.__name__}({self.filename})" 

42 

43 def on_content_change(self): 

44 pass 

45 

46 @property 

47 def filename(self): 

48 return self.zipinfo.filename 

49 

50 @filename.setter 

51 def filename(self, value: str): 

52 self._set_filename(value) 

53 

54 def _set_filename(self, value: str): 

55 self.zipinfo.filename = value 

56 

57 @property 

58 def content(self) -> bytes: 

59 self.check_closed() 

60 if self._content is None: 

61 if self._file is None: 

62 return b"" 

63 self._content = self._file.read() 

64 __ = self._file.seek(0) 

65 return self._content 

66 

67 @content.setter 

68 def content(self, value: bytes): 

69 self.check_closed() 

70 self._set_content(value) 

71 

72 def _set_content(self, value: bytes, content_change: bool = True): 

73 self._content = value 

74 if content_change: 

75 self.on_content_change() 

76 

77 def free(self): 

78 del self._content 

79 self._content = None 

80 self.on_content_change() 

81 

82 def get_title(self): 

83 return self.filename 

84 

85 def check_closed(self): 

86 if self._closed: 

87 raise EPUBError(f"Using resource {self.filename} after closing") 

88 

89 def close(self): 

90 self.free() 

91 if self._file is not None: 

92 self._file.close() 

93 self._file = None 

94 

95 def href_to_filename[T: (str, Path)](self, href: T) -> T: 

96 return get_absolute_href(self.filename, href) 

97 

98 

99class XMLResource[S: bs4.BeautifulSoup = bs4.BeautifulSoup](Resource): 

100 """A resource that is an XML file.""" 

101 

102 soup_class: type[S] = bs4.BeautifulSoup # type: ignore[reportAssignmentType] 

103 

104 def __init__(self, file: IO[bytes] | bytes, info: ZipInfo | str | Path) -> None: 

105 super().__init__(file, info) 

106 self._soup: None | S = None 

107 

108 @property 

109 def soup(self) -> S: 

110 if self._soup is None: 

111 self._soup = self.soup_class(self.content, "xml") 

112 return self._soup 

113 

114 @soup.setter 

115 def soup(self, value: S): 

116 self._set_soup(value) 

117 

118 def _set_soup(self, value: S): 

119 self._soup = value 

120 

121 @property 

122 @override 

123 def content(self): 

124 if self._soup is not None: 

125 self._set_content(self._soup.encode(), content_change=False) 

126 return super().content 

127 

128 @content.setter 

129 def content(self, value: bytes): 

130 super()._set_content(value) 

131 

132 @override 

133 def on_content_change(self): 

134 super().on_content_change() 

135 del self._soup 

136 self._soup = None 

137 

138 @override 

139 def get_title(self): 

140 if self.soup.title and self.soup.title.string: 

141 return self.soup.title.string 

142 return super().get_title() 

143 

144 

145class PublicationResource(Resource): 

146 """ 

147 A resource that contributes to the logic and rendering of the publication. 

148 

149 This includes resources like the package document, content documents (XHTML), 

150 CSS stylesheets, audio, video, images, fonts, and scripts. 

151 """ 

152 

153 def __init__( 

154 self, 

155 file: IO[bytes] | bytes, 

156 info: ZipInfo | str | Path, 

157 media_type: MediaType | str | None = None, 

158 ) -> None: 

159 super().__init__(file, info) 

160 if media_type is None: 

161 media_type = guess_file_type(self.zipinfo.filename)[0] 

162 if media_type is None: 

163 raise EPUBError( 

164 f"Cannot determine media type of {self.zipinfo.filename}" 

165 ) 

166 

167 media_type = MediaType.coalesce(media_type) 

168 self.media_type: MediaType | str = media_type 

169 

170 @property 

171 def is_foreign(self): 

172 return isinstance(self.media_type, str) 

173 

174 @property 

175 def category(self): 

176 if isinstance(self.media_type, str): 

177 return Category.FOREIGN 

178 return self.media_type.category 

179 

180 @classmethod 

181 def from_resource(cls, other: Resource, media_type: str | MediaType | None = None): 

182 if other._file is None or other._closed: 

183 raise EPUBError(f"Using resource {other} after closing") 

184 

185 return cls(other._file, other.zipinfo, media_type) 

186 

187 

188class ContentDocument[S: bs4.BeautifulSoup = bs4.BeautifulSoup]( # type: ignore[reportUnsafeMultipleInheritance] 

189 PublicationResource, 

190 XMLResource[S], 

191): 

192 """ 

193 A publication resource referenced from the spine or a manifest fallback 

194 chain that conforms to either the XHTML or SVG content document definitions. 

195 """ 

196 

197 @override 

198 def get_title(self): 

199 if self.soup.h1 and self.soup.h1.string: 

200 return self.soup.h1.string 

201 return super().get_title()