Coverage for src/epublib/package/metadata.py: 91%

201 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-18 16:07 -0300

1from abc import ABC 

2from dataclasses import dataclass 

3from datetime import datetime 

4from typing import ClassVar, Self, override 

5 

6import bs4 

7 

8from epublib.exceptions import EPUBError, warn 

9from epublib.util import datetime_to_str 

10from epublib.xml_element import ValueType, XMLElement, XMLParent 

11 

12 

13class EPUBMetadataError(EPUBError): 

14 """An error occurred while parsing EPUB metadata.""" 

15 

16 

17@dataclass(kw_only=True) 

18class MetadataItem(XMLElement, ABC): 

19 """Abstract base class for EPUB metadata items.""" 

20 

21 @classmethod 

22 def detect(cls, tag: bs4.Tag): 

23 if tag.name == "link" and tag.get("href"): 

24 return LinkMetadataItem.from_tag(tag) 

25 if tag.prefix == "dc": 

26 return DublinCoreMetadataItem.from_tag(tag) 

27 if tag.name == "meta" and tag.get("content"): 

28 return OPF2MetadataItem.from_tag(tag) 

29 if tag.name == "meta" and tag.get("property") and tag.string: 

30 return GenericMetadataItem.from_tag(tag) 

31 raise ValueError(f"{tag.name} is not a metadata item") 

32 

33 

34@dataclass(kw_only=True) 

35class LinkMetadataItem(MetadataItem): 

36 """A link metadata item, used for linking to resources.""" 

37 

38 # 'name' corresponds to href in the xml 

39 hreflang: str | None = None 

40 media_type: str | None = None 

41 properties: str | None = None 

42 refines: str | None = None 

43 rel: str | None = None 

44 

45 obj_to_tag: ClassVar[dict[str, str]] = {"name": "href"} 

46 

47 @property 

48 @override 

49 def tag_name(self): 

50 return "link" 

51 

52 @classmethod 

53 @override 

54 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self: 

55 if not tag.name == "link" or not tag["href"]: 

56 raise ValueError(f"{tag.name} is not generic metadata item") 

57 

58 return super().from_tag(tag, **kwargs) 

59 

60 

61@dataclass 

62class ValuedMetadataItem(MetadataItem, ABC): 

63 """Abstract base class for all metadata items that have a value (i.e., all except LinkMetadataItem).""" 

64 

65 value: str 

66 id: str | None = None 

67 

68 

69@dataclass 

70class DublinCoreMetadataItem(ValuedMetadataItem): 

71 """A Dublin Core metadata item.""" 

72 

73 dir: str | None = None 

74 lang: str | None = None 

75 

76 obj_to_tag: ClassVar[dict[str, str]] = {"lang": "xml:lang"} 

77 exclude_from_tag: ClassVar[list[str]] = ["tag", "name", "value"] 

78 

79 @property 

80 @override 

81 def tag_name(self): 

82 return f"dc:{self.name}" 

83 

84 @override 

85 def create_tag(self, soup: bs4.BeautifulSoup, **kwargs: str) -> bs4.Tag: 

86 tag = super().create_tag(soup, **kwargs) 

87 tag.string = self.value 

88 

89 return tag 

90 

91 @classmethod 

92 @override 

93 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self: 

94 if not tag.prefix == "dc": 

95 raise ValueError(f"{tag.name} is no Dublin Core metadata item") 

96 

97 name = tag.name 

98 value = tag.string if tag.string is not None else "" 

99 

100 return super().from_tag(tag, name=name, value=value) 

101 

102 @override 

103 def update_tag(self, field: str, value: ValueType | None): 

104 if field == "name" and isinstance(value, str): 

105 self.tag.name = value 

106 elif field == "value": 

107 if value is None: 

108 self.tag.string = "" 

109 else: 

110 self.tag.string = self.value_to_str(field, value) 

111 else: 

112 super().update_tag(field, value) 

113 

114 

115@dataclass 

116class OPF2MetadataItem(ValuedMetadataItem): 

117 """An OPF2 metadata item.""" 

118 

119 obj_to_tag: ClassVar[dict[str, str]] = {"value": "content"} 

120 

121 @property 

122 @override 

123 def tag_name(self): 

124 return "meta" 

125 

126 @classmethod 

127 @override 

128 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self: 

129 if ( 

130 tag.name != "meta" 

131 or tag.prefix not in [None, "opf"] 

132 or not (tag.get("content") and tag.get("name")) 

133 ): 

134 raise ValueError(f"{tag.name} is not OPF2 metadata item") 

135 

136 return super().from_tag(tag, **kwargs) 

137 

138 

139@dataclass 

140class GenericMetadataItem(ValuedMetadataItem): 

141 """A generic metadata item""" 

142 

143 dir: str | None = None 

144 lang: str | None = None 

145 refines: str | None = None 

146 scheme: str | None = None 

147 

148 obj_to_tag: ClassVar[dict[str, str]] = {"name": "property", "lang": "xml:lang"} 

149 exclude_from_tag: ClassVar[list[str]] = ["tag", "value"] 

150 

151 @property 

152 @override 

153 def tag_name(self): 

154 return "meta" 

155 

156 @override 

157 def create_tag(self, soup: bs4.BeautifulSoup, **kwargs: str) -> bs4.Tag: 

158 tag = super().create_tag(soup, **kwargs) 

159 tag.string = self.value 

160 return tag 

161 

162 @classmethod 

163 @override 

164 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self: 

165 if not tag.name == "meta" or not tag.get("property"): 

166 raise ValueError(f"{tag.name} is not generic metadata item") 

167 

168 value = tag.string if tag.string is not None else "" 

169 return super().from_tag(tag, value=value, **kwargs) 

170 

171 @override 

172 def update_tag(self, field: str, value: ValueType | None): 

173 if field == "value": 

174 if value is None: 

175 self.tag.string = "" 

176 else: 

177 self.tag.string = self.value_to_str(field, value) 

178 else: 

179 super().update_tag(field, value) 

180 

181 

182class BookMetadata(XMLParent[MetadataItem]): 

183 """The EPUB metadata, which contains information about the book.""" 

184 

185 default_item_type: type[MetadataItem] = MetadataItem 

186 tag_name: str | None = "metadata" 

187 

188 @override 

189 def create_items(self) -> list[MetadataItem]: 

190 items: list[MetadataItem] = [] 

191 for tag in self.tag.children: 

192 if isinstance(tag, bs4.Tag): 

193 try: 

194 items.append(MetadataItem.detect(tag)) 

195 except EPUBMetadataError: 

196 warn(f"Couldn't parse metadata item {tag}") 

197 

198 return items 

199 

200 def add(self, name: str, value: str): 

201 item = GenericMetadataItem(name=name, value=value) 

202 __ = self.add_item(item) 

203 

204 return item 

205 

206 def add_dc(self, name: str, value: str): 

207 item = DublinCoreMetadataItem(name=name, value=value) 

208 __ = self.add_item(item) 

209 

210 return item 

211 

212 @property 

213 def identifier(self): 

214 item = self.get("identifier") 

215 if item and isinstance(item, DublinCoreMetadataItem): 

216 return item.value 

217 return None 

218 

219 @identifier.setter 

220 def identifier(self, value: str): 

221 item = self.get("identifier") 

222 if item and isinstance(item, DublinCoreMetadataItem): 

223 item.value = value 

224 return 

225 

226 item = DublinCoreMetadataItem( 

227 name="identifier", 

228 value=value, 

229 ) 

230 __ = self.add_item(item) 

231 

232 @property 

233 def title(self): 

234 item = self.get("title") 

235 if item and isinstance(item, DublinCoreMetadataItem): 

236 return item.value 

237 return None 

238 

239 @title.setter 

240 def title(self, value: str): 

241 item = self.get("title") 

242 if item and isinstance(item, DublinCoreMetadataItem): 

243 item.value = value 

244 return 

245 

246 item = DublinCoreMetadataItem(name="title", value=value) 

247 __ = self.add_item(item) 

248 

249 @property 

250 def language(self): 

251 item = self.get("language") 

252 if item and isinstance(item, DublinCoreMetadataItem): 

253 return item.value 

254 return None 

255 

256 @language.setter 

257 def language(self, value: str): 

258 item = self.get("language") 

259 if item and isinstance(item, DublinCoreMetadataItem): 

260 item.value = value 

261 return 

262 

263 item = DublinCoreMetadataItem(name="language", value=value) 

264 __ = self.add_item(item) 

265 

266 @property 

267 def modified(self) -> datetime | None: 

268 item = self.get("dcterms:modified") 

269 if item and isinstance(item, GenericMetadataItem): 

270 try: 

271 return datetime.fromisoformat(item.value) 

272 except ValueError: 

273 return None 

274 return None 

275 

276 @modified.setter 

277 def modified(self, value: datetime): 

278 str_value = datetime_to_str(value) 

279 item = self.get("dcterms:modified") 

280 if item and isinstance(item, GenericMetadataItem): 

281 item.value = str_value 

282 return 

283 

284 item = GenericMetadataItem(name="dcterms:modified", value=str_value) 

285 __ = self.add_item(item)