Coverage for src/epublib/package/metadata.py: 100%
190 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 11:08 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 11:08 -0300
1from abc import ABC, abstractmethod
2from dataclasses import dataclass
3from datetime import datetime
4from typing import Annotated, ClassVar, Self, cast, overload, override
6import bs4
8from epublib.exceptions import EPUBError, warn
9from epublib.media_type import MediaType
10from epublib.util import attr_to_str, datetime_to_str
11from epublib.xml_element import (
12 AttributeValue,
13 SyncType,
14 XMLAttribute,
15 XMLElement,
16 XMLParent,
17)
20@dataclass(kw_only=True)
21class MetadataItem(XMLElement, ABC):
22 """Abstract base class for EPUB metadata items."""
24 @property
25 @abstractmethod
26 def pk(self) -> str: ...
28 @classmethod
29 def detect(cls, soup: bs4.BeautifulSoup, tag: bs4.Tag):
30 if tag.name == "link" and tag.get("href"):
31 return LinkMetadataItem.from_tag(soup, tag)
32 if tag.prefix == "dc":
33 return DublinCoreMetadataItem.from_tag(soup, tag)
34 if tag.name == "meta" and tag.get("content"):
35 return OPF2MetadataItem.from_tag(soup, tag)
36 if tag.name == "meta" and tag.get("property") and tag.string:
37 return GenericMetadataItem.from_tag(soup, tag)
38 raise EPUBError(f"{tag.name} is not a metadata item")
41@dataclass(kw_only=True)
42class LinkMetadataItem(MetadataItem):
43 """A link metadata item, used for linking to resources."""
45 href: Annotated[str, XMLAttribute()]
46 hreflang: Annotated[str | None, XMLAttribute()] = None
47 media_type: Annotated[str | None, XMLAttribute("media-type")] = None
48 properties: Annotated[str | None, XMLAttribute()] = None
49 refines: Annotated[str | None, XMLAttribute()] = None
50 rel: Annotated[str | None, XMLAttribute()] = None
52 tag_name: ClassVar[str] = "link"
54 @property
55 @override
56 def pk(self) -> str:
57 return self.href
59 @classmethod
60 @override
61 def from_tag(
62 cls,
63 soup: bs4.BeautifulSoup,
64 tag: bs4.Tag,
65 **kwargs: AttributeValue,
66 ) -> Self:
67 if not tag.name == "link" or not tag["href"]:
68 raise EPUBError(f"{tag.name} is not generic metadata item")
70 return super().from_tag(soup, tag, **kwargs)
73@dataclass(kw_only=True)
74class ValuedMetadataItem(MetadataItem, ABC):
75 """
76 Abstract base class for all metadata items that have a value (i.e.,
77 all except LinkMetadataItem).
78 """
80 name: str
81 value: str
82 id: Annotated[str | None, XMLAttribute()] = None
84 @property
85 @override
86 def pk(self) -> str:
87 return self.name
90@dataclass(kw_only=True)
91class DublinCoreMetadataItem(ValuedMetadataItem):
92 """A Dublin Core metadata item."""
94 name: Annotated[str, XMLAttribute(sync=SyncType.NAME, prefix="dc")]
95 value: Annotated[str, XMLAttribute(sync=SyncType.STRING)]
96 dir: Annotated[str | None, XMLAttribute()] = None
97 lang: Annotated[str | None, XMLAttribute("xml:lang")] = None
99 @override
100 def get_tag_name(self) -> str:
101 return f"dc:{self.pk}"
103 @classmethod
104 @override
105 def from_tag(
106 cls,
107 soup: bs4.BeautifulSoup,
108 tag: bs4.Tag,
109 **kwargs: AttributeValue,
110 ) -> Self:
111 if not tag.prefix == "dc":
112 raise EPUBError(f"{tag.name} is no Dublin Core metadata item")
114 return super().from_tag(soup, tag, **kwargs)
117@dataclass(kw_only=True)
118class OPF2MetadataItem(ValuedMetadataItem):
119 """An OPF2 metadata item."""
121 name: Annotated[str, XMLAttribute()]
122 value: Annotated[str, XMLAttribute("content")]
124 tag_name: ClassVar[str] = "meta"
126 @classmethod
127 @override
128 def from_tag(cls, soup: bs4.BeautifulSoup, tag: bs4.Tag, **kwargs: str) -> Self:
129 if (
130 tag.name != "meta"
131 or tag.prefix not in [None, "opf"]
132 or not (tag.get("content") and tag.get("name"))
133 ):
134 raise EPUBError(f"{tag.name} is not OPF2 metadata item")
136 return super().from_tag(soup, tag, **kwargs)
139@dataclass(kw_only=True)
140class GenericMetadataItem(ValuedMetadataItem):
141 """A generic metadata item"""
143 value: Annotated[str, XMLAttribute(sync=SyncType.STRING)]
144 name: Annotated[str, XMLAttribute("property")]
145 dir: Annotated[str | None, XMLAttribute()] = None
146 lang: Annotated[str | None, XMLAttribute("xml:lang")] = None
147 refines: Annotated[str | None, XMLAttribute()] = None
148 scheme: Annotated[str | None, XMLAttribute()] = None
150 tag_name: ClassVar[str] = "meta"
152 @override
153 def create_tag(self) -> None:
154 super().create_tag()
155 self.tag.string = self.value
157 @classmethod
158 @override
159 def from_tag(
160 cls,
161 soup: bs4.BeautifulSoup,
162 tag: bs4.Tag,
163 **kwargs: AttributeValue,
164 ) -> Self:
165 if not tag.name == "meta" or not tag.get("property"):
166 raise EPUBError(f"{tag.name} is not generic metadata item")
168 return super().from_tag(soup, tag, **kwargs)
171class BookMetadata(XMLParent[MetadataItem]):
172 """The EPUB metadata, which contains information about the book."""
174 default_item_type: type[MetadataItem] = MetadataItem
175 tag_name: str | None = "metadata"
177 @override
178 def parse_items(self) -> list[MetadataItem]:
179 items: list[MetadataItem] = []
181 for tag in self.tag.find_all(True, recursive=False):
182 try:
183 items.append(MetadataItem.detect(self.soup, tag))
184 except EPUBError:
185 warn(f"Couldn't parse metadata item {tag}")
187 return items
189 @overload
190 def add[T: ValuedMetadataItem](self, name: str, value: str, cls: type[T]) -> T: ...
192 @overload
193 def add(self, name: str, value: str) -> GenericMetadataItem: ...
195 def add( # type: ignore[reportIncompatibleMethodOverride]
196 self,
197 name: str,
198 value: str,
199 cls: type[ValuedMetadataItem] = GenericMetadataItem,
200 ) -> ValuedMetadataItem:
201 item = cls(soup=self.soup, name=name, value=value)
202 __ = self.add_item(item)
204 return item
206 def add_dc(
207 self,
208 name: str,
209 value: str,
210 id: str | None = None,
211 dir: str | None = None,
212 lang: str | None = None,
213 ) -> DublinCoreMetadataItem:
214 item = DublinCoreMetadataItem(
215 soup=self.soup,
216 name=name,
217 value=value,
218 id=id,
219 dir=dir,
220 lang=lang,
221 )
222 __ = self.add_item(item)
223 return item
225 def add_opf(self, name: str, value: str, id: str | None = None) -> OPF2MetadataItem:
226 item = OPF2MetadataItem(soup=self.soup, name=name, value=value, id=id)
227 __ = self.add_item(item)
228 return item
230 def add_link(
231 self,
232 href: str,
233 hreflang: str | None = None,
234 media_type: str | MediaType | None = None,
235 properties: str | None = None,
236 refines: str | None = None,
237 rel: str | None = None,
238 ) -> LinkMetadataItem:
239 item = LinkMetadataItem(
240 soup=self.soup,
241 href=href,
242 hreflang=hreflang,
243 media_type=MediaType(media_type).value,
244 properties=properties,
245 refines=refines,
246 rel=rel,
247 )
248 return cast(LinkMetadataItem, self.add_item(item))
250 @property
251 def identifier(self):
252 item = self.get("identifier")
253 if item and isinstance(item, DublinCoreMetadataItem):
254 return item.value
255 return None
257 @identifier.setter
258 def identifier(self, value: str):
259 item = self.get("identifier")
261 package = self.tag.parent
262 unique_identifier = None
263 if package and package.name == "package" and package.get("unique-identifier"):
264 unique_identifier = attr_to_str(package["unique-identifier"])
266 if item and isinstance(item, DublinCoreMetadataItem):
267 item.value = value
268 if unique_identifier:
269 item.tag["id"] = unique_identifier
270 return
272 item = DublinCoreMetadataItem(
273 soup=self.soup,
274 name="identifier",
275 value=value,
276 )
277 if unique_identifier:
278 item.tag["id"] = unique_identifier
280 __ = self.add_item(item)
282 @property
283 def title(self):
284 item = self.get("title")
285 if item and isinstance(item, DublinCoreMetadataItem):
286 return item.value
287 return None
289 @title.setter
290 def title(self, value: str):
291 item = self.get("title")
292 if item and isinstance(item, DublinCoreMetadataItem):
293 item.value = value
294 return
296 item = DublinCoreMetadataItem(soup=self.soup, name="title", value=value)
297 __ = self.add_item(item)
299 @property
300 def language(self):
301 item = self.get("language")
302 if item and isinstance(item, DublinCoreMetadataItem):
303 return item.value
304 return None
306 @language.setter
307 def language(self, value: str):
308 item = self.get("language")
309 if item and isinstance(item, DublinCoreMetadataItem):
310 item.value = value
311 return
313 item = DublinCoreMetadataItem(soup=self.soup, name="language", value=value)
314 __ = self.add_item(item)
316 @property
317 def modified(self) -> datetime | None:
318 item = self.get("dcterms:modified")
319 if item and isinstance(item, GenericMetadataItem):
320 try:
321 return datetime.fromisoformat(item.value)
322 except ValueError:
323 return None
324 return None
326 @modified.setter
327 def modified(self, value: datetime):
328 str_value = datetime_to_str(value)
330 item = self.get("dcterms:modified")
331 if item and isinstance(item, GenericMetadataItem):
332 item.value = str_value
333 return
335 item = GenericMetadataItem(
336 soup=self.soup,
337 name="dcterms:modified",
338 value=str_value,
339 )
340 __ = self.add_item(item)