Coverage for src/epublib/package/metadata.py: 100%

190 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 11:08 -0300

1from abc import ABC, abstractmethod 

2from dataclasses import dataclass 

3from datetime import datetime 

4from typing import Annotated, ClassVar, Self, cast, overload, override 

5 

6import bs4 

7 

8from epublib.exceptions import EPUBError, warn 

9from epublib.media_type import MediaType 

10from epublib.util import attr_to_str, datetime_to_str 

11from epublib.xml_element import ( 

12 AttributeValue, 

13 SyncType, 

14 XMLAttribute, 

15 XMLElement, 

16 XMLParent, 

17) 

18 

19 

20@dataclass(kw_only=True) 

21class MetadataItem(XMLElement, ABC): 

22 """Abstract base class for EPUB metadata items.""" 

23 

24 @property 

25 @abstractmethod 

26 def pk(self) -> str: ... 

27 

28 @classmethod 

29 def detect(cls, soup: bs4.BeautifulSoup, tag: bs4.Tag): 

30 if tag.name == "link" and tag.get("href"): 

31 return LinkMetadataItem.from_tag(soup, tag) 

32 if tag.prefix == "dc": 

33 return DublinCoreMetadataItem.from_tag(soup, tag) 

34 if tag.name == "meta" and tag.get("content"): 

35 return OPF2MetadataItem.from_tag(soup, tag) 

36 if tag.name == "meta" and tag.get("property") and tag.string: 

37 return GenericMetadataItem.from_tag(soup, tag) 

38 raise EPUBError(f"{tag.name} is not a metadata item") 

39 

40 

41@dataclass(kw_only=True) 

42class LinkMetadataItem(MetadataItem): 

43 """A link metadata item, used for linking to resources.""" 

44 

45 href: Annotated[str, XMLAttribute()] 

46 hreflang: Annotated[str | None, XMLAttribute()] = None 

47 media_type: Annotated[str | None, XMLAttribute("media-type")] = None 

48 properties: Annotated[str | None, XMLAttribute()] = None 

49 refines: Annotated[str | None, XMLAttribute()] = None 

50 rel: Annotated[str | None, XMLAttribute()] = None 

51 

52 tag_name: ClassVar[str] = "link" 

53 

54 @property 

55 @override 

56 def pk(self) -> str: 

57 return self.href 

58 

59 @classmethod 

60 @override 

61 def from_tag( 

62 cls, 

63 soup: bs4.BeautifulSoup, 

64 tag: bs4.Tag, 

65 **kwargs: AttributeValue, 

66 ) -> Self: 

67 if not tag.name == "link" or not tag["href"]: 

68 raise EPUBError(f"{tag.name} is not generic metadata item") 

69 

70 return super().from_tag(soup, tag, **kwargs) 

71 

72 

73@dataclass(kw_only=True) 

74class ValuedMetadataItem(MetadataItem, ABC): 

75 """ 

76 Abstract base class for all metadata items that have a value (i.e., 

77 all except LinkMetadataItem). 

78 """ 

79 

80 name: str 

81 value: str 

82 id: Annotated[str | None, XMLAttribute()] = None 

83 

84 @property 

85 @override 

86 def pk(self) -> str: 

87 return self.name 

88 

89 

90@dataclass(kw_only=True) 

91class DublinCoreMetadataItem(ValuedMetadataItem): 

92 """A Dublin Core metadata item.""" 

93 

94 name: Annotated[str, XMLAttribute(sync=SyncType.NAME, prefix="dc")] 

95 value: Annotated[str, XMLAttribute(sync=SyncType.STRING)] 

96 dir: Annotated[str | None, XMLAttribute()] = None 

97 lang: Annotated[str | None, XMLAttribute("xml:lang")] = None 

98 

99 @override 

100 def get_tag_name(self) -> str: 

101 return f"dc:{self.pk}" 

102 

103 @classmethod 

104 @override 

105 def from_tag( 

106 cls, 

107 soup: bs4.BeautifulSoup, 

108 tag: bs4.Tag, 

109 **kwargs: AttributeValue, 

110 ) -> Self: 

111 if not tag.prefix == "dc": 

112 raise EPUBError(f"{tag.name} is no Dublin Core metadata item") 

113 

114 return super().from_tag(soup, tag, **kwargs) 

115 

116 

117@dataclass(kw_only=True) 

118class OPF2MetadataItem(ValuedMetadataItem): 

119 """An OPF2 metadata item.""" 

120 

121 name: Annotated[str, XMLAttribute()] 

122 value: Annotated[str, XMLAttribute("content")] 

123 

124 tag_name: ClassVar[str] = "meta" 

125 

126 @classmethod 

127 @override 

128 def from_tag(cls, soup: bs4.BeautifulSoup, tag: bs4.Tag, **kwargs: str) -> Self: 

129 if ( 

130 tag.name != "meta" 

131 or tag.prefix not in [None, "opf"] 

132 or not (tag.get("content") and tag.get("name")) 

133 ): 

134 raise EPUBError(f"{tag.name} is not OPF2 metadata item") 

135 

136 return super().from_tag(soup, tag, **kwargs) 

137 

138 

139@dataclass(kw_only=True) 

140class GenericMetadataItem(ValuedMetadataItem): 

141 """A generic metadata item""" 

142 

143 value: Annotated[str, XMLAttribute(sync=SyncType.STRING)] 

144 name: Annotated[str, XMLAttribute("property")] 

145 dir: Annotated[str | None, XMLAttribute()] = None 

146 lang: Annotated[str | None, XMLAttribute("xml:lang")] = None 

147 refines: Annotated[str | None, XMLAttribute()] = None 

148 scheme: Annotated[str | None, XMLAttribute()] = None 

149 

150 tag_name: ClassVar[str] = "meta" 

151 

152 @override 

153 def create_tag(self) -> None: 

154 super().create_tag() 

155 self.tag.string = self.value 

156 

157 @classmethod 

158 @override 

159 def from_tag( 

160 cls, 

161 soup: bs4.BeautifulSoup, 

162 tag: bs4.Tag, 

163 **kwargs: AttributeValue, 

164 ) -> Self: 

165 if not tag.name == "meta" or not tag.get("property"): 

166 raise EPUBError(f"{tag.name} is not generic metadata item") 

167 

168 return super().from_tag(soup, tag, **kwargs) 

169 

170 

171class BookMetadata(XMLParent[MetadataItem]): 

172 """The EPUB metadata, which contains information about the book.""" 

173 

174 default_item_type: type[MetadataItem] = MetadataItem 

175 tag_name: str | None = "metadata" 

176 

177 @override 

178 def parse_items(self) -> list[MetadataItem]: 

179 items: list[MetadataItem] = [] 

180 

181 for tag in self.tag.find_all(True, recursive=False): 

182 try: 

183 items.append(MetadataItem.detect(self.soup, tag)) 

184 except EPUBError: 

185 warn(f"Couldn't parse metadata item {tag}") 

186 

187 return items 

188 

189 @overload 

190 def add[T: ValuedMetadataItem](self, name: str, value: str, cls: type[T]) -> T: ... 

191 

192 @overload 

193 def add(self, name: str, value: str) -> GenericMetadataItem: ... 

194 

195 def add( # type: ignore[reportIncompatibleMethodOverride] 

196 self, 

197 name: str, 

198 value: str, 

199 cls: type[ValuedMetadataItem] = GenericMetadataItem, 

200 ) -> ValuedMetadataItem: 

201 item = cls(soup=self.soup, name=name, value=value) 

202 __ = self.add_item(item) 

203 

204 return item 

205 

206 def add_dc( 

207 self, 

208 name: str, 

209 value: str, 

210 id: str | None = None, 

211 dir: str | None = None, 

212 lang: str | None = None, 

213 ) -> DublinCoreMetadataItem: 

214 item = DublinCoreMetadataItem( 

215 soup=self.soup, 

216 name=name, 

217 value=value, 

218 id=id, 

219 dir=dir, 

220 lang=lang, 

221 ) 

222 __ = self.add_item(item) 

223 return item 

224 

225 def add_opf(self, name: str, value: str, id: str | None = None) -> OPF2MetadataItem: 

226 item = OPF2MetadataItem(soup=self.soup, name=name, value=value, id=id) 

227 __ = self.add_item(item) 

228 return item 

229 

230 def add_link( 

231 self, 

232 href: str, 

233 hreflang: str | None = None, 

234 media_type: str | MediaType | None = None, 

235 properties: str | None = None, 

236 refines: str | None = None, 

237 rel: str | None = None, 

238 ) -> LinkMetadataItem: 

239 item = LinkMetadataItem( 

240 soup=self.soup, 

241 href=href, 

242 hreflang=hreflang, 

243 media_type=MediaType(media_type).value, 

244 properties=properties, 

245 refines=refines, 

246 rel=rel, 

247 ) 

248 return cast(LinkMetadataItem, self.add_item(item)) 

249 

250 @property 

251 def identifier(self): 

252 item = self.get("identifier") 

253 if item and isinstance(item, DublinCoreMetadataItem): 

254 return item.value 

255 return None 

256 

257 @identifier.setter 

258 def identifier(self, value: str): 

259 item = self.get("identifier") 

260 

261 package = self.tag.parent 

262 unique_identifier = None 

263 if package and package.name == "package" and package.get("unique-identifier"): 

264 unique_identifier = attr_to_str(package["unique-identifier"]) 

265 

266 if item and isinstance(item, DublinCoreMetadataItem): 

267 item.value = value 

268 if unique_identifier: 

269 item.tag["id"] = unique_identifier 

270 return 

271 

272 item = DublinCoreMetadataItem( 

273 soup=self.soup, 

274 name="identifier", 

275 value=value, 

276 ) 

277 if unique_identifier: 

278 item.tag["id"] = unique_identifier 

279 

280 __ = self.add_item(item) 

281 

282 @property 

283 def title(self): 

284 item = self.get("title") 

285 if item and isinstance(item, DublinCoreMetadataItem): 

286 return item.value 

287 return None 

288 

289 @title.setter 

290 def title(self, value: str): 

291 item = self.get("title") 

292 if item and isinstance(item, DublinCoreMetadataItem): 

293 item.value = value 

294 return 

295 

296 item = DublinCoreMetadataItem(soup=self.soup, name="title", value=value) 

297 __ = self.add_item(item) 

298 

299 @property 

300 def language(self): 

301 item = self.get("language") 

302 if item and isinstance(item, DublinCoreMetadataItem): 

303 return item.value 

304 return None 

305 

306 @language.setter 

307 def language(self, value: str): 

308 item = self.get("language") 

309 if item and isinstance(item, DublinCoreMetadataItem): 

310 item.value = value 

311 return 

312 

313 item = DublinCoreMetadataItem(soup=self.soup, name="language", value=value) 

314 __ = self.add_item(item) 

315 

316 @property 

317 def modified(self) -> datetime | None: 

318 item = self.get("dcterms:modified") 

319 if item and isinstance(item, GenericMetadataItem): 

320 try: 

321 return datetime.fromisoformat(item.value) 

322 except ValueError: 

323 return None 

324 return None 

325 

326 @modified.setter 

327 def modified(self, value: datetime): 

328 str_value = datetime_to_str(value) 

329 

330 item = self.get("dcterms:modified") 

331 if item and isinstance(item, GenericMetadataItem): 

332 item.value = str_value 

333 return 

334 

335 item = GenericMetadataItem( 

336 soup=self.soup, 

337 name="dcterms:modified", 

338 value=str_value, 

339 ) 

340 __ = self.add_item(item)