Coverage for src/epublib/package/metadata.py: 91%
201 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-18 16:07 -0300
1from abc import ABC
2from dataclasses import dataclass
3from datetime import datetime
4from typing import ClassVar, Self, override
6import bs4
8from epublib.exceptions import EPUBError, warn
9from epublib.util import datetime_to_str
10from epublib.xml_element import ValueType, XMLElement, XMLParent
13class EPUBMetadataError(EPUBError):
14 """An error occurred while parsing EPUB metadata."""
17@dataclass(kw_only=True)
18class MetadataItem(XMLElement, ABC):
19 """Abstract base class for EPUB metadata items."""
21 @classmethod
22 def detect(cls, tag: bs4.Tag):
23 if tag.name == "link" and tag.get("href"):
24 return LinkMetadataItem.from_tag(tag)
25 if tag.prefix == "dc":
26 return DublinCoreMetadataItem.from_tag(tag)
27 if tag.name == "meta" and tag.get("content"):
28 return OPF2MetadataItem.from_tag(tag)
29 if tag.name == "meta" and tag.get("property") and tag.string:
30 return GenericMetadataItem.from_tag(tag)
31 raise ValueError(f"{tag.name} is not a metadata item")
34@dataclass(kw_only=True)
35class LinkMetadataItem(MetadataItem):
36 """A link metadata item, used for linking to resources."""
38 # 'name' corresponds to href in the xml
39 hreflang: str | None = None
40 media_type: str | None = None
41 properties: str | None = None
42 refines: str | None = None
43 rel: str | None = None
45 obj_to_tag: ClassVar[dict[str, str]] = {"name": "href"}
47 @property
48 @override
49 def tag_name(self):
50 return "link"
52 @classmethod
53 @override
54 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self:
55 if not tag.name == "link" or not tag["href"]:
56 raise ValueError(f"{tag.name} is not generic metadata item")
58 return super().from_tag(tag, **kwargs)
61@dataclass
62class ValuedMetadataItem(MetadataItem, ABC):
63 """Abstract base class for all metadata items that have a value (i.e., all except LinkMetadataItem)."""
65 value: str
66 id: str | None = None
69@dataclass
70class DublinCoreMetadataItem(ValuedMetadataItem):
71 """A Dublin Core metadata item."""
73 dir: str | None = None
74 lang: str | None = None
76 obj_to_tag: ClassVar[dict[str, str]] = {"lang": "xml:lang"}
77 exclude_from_tag: ClassVar[list[str]] = ["tag", "name", "value"]
79 @property
80 @override
81 def tag_name(self):
82 return f"dc:{self.name}"
84 @override
85 def create_tag(self, soup: bs4.BeautifulSoup, **kwargs: str) -> bs4.Tag:
86 tag = super().create_tag(soup, **kwargs)
87 tag.string = self.value
89 return tag
91 @classmethod
92 @override
93 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self:
94 if not tag.prefix == "dc":
95 raise ValueError(f"{tag.name} is no Dublin Core metadata item")
97 name = tag.name
98 value = tag.string if tag.string is not None else ""
100 return super().from_tag(tag, name=name, value=value)
102 @override
103 def update_tag(self, field: str, value: ValueType | None):
104 if field == "name" and isinstance(value, str):
105 self.tag.name = value
106 elif field == "value":
107 if value is None:
108 self.tag.string = ""
109 else:
110 self.tag.string = self.value_to_str(field, value)
111 else:
112 super().update_tag(field, value)
115@dataclass
116class OPF2MetadataItem(ValuedMetadataItem):
117 """An OPF2 metadata item."""
119 obj_to_tag: ClassVar[dict[str, str]] = {"value": "content"}
121 @property
122 @override
123 def tag_name(self):
124 return "meta"
126 @classmethod
127 @override
128 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self:
129 if (
130 tag.name != "meta"
131 or tag.prefix not in [None, "opf"]
132 or not (tag.get("content") and tag.get("name"))
133 ):
134 raise ValueError(f"{tag.name} is not OPF2 metadata item")
136 return super().from_tag(tag, **kwargs)
139@dataclass
140class GenericMetadataItem(ValuedMetadataItem):
141 """A generic metadata item"""
143 dir: str | None = None
144 lang: str | None = None
145 refines: str | None = None
146 scheme: str | None = None
148 obj_to_tag: ClassVar[dict[str, str]] = {"name": "property", "lang": "xml:lang"}
149 exclude_from_tag: ClassVar[list[str]] = ["tag", "value"]
151 @property
152 @override
153 def tag_name(self):
154 return "meta"
156 @override
157 def create_tag(self, soup: bs4.BeautifulSoup, **kwargs: str) -> bs4.Tag:
158 tag = super().create_tag(soup, **kwargs)
159 tag.string = self.value
160 return tag
162 @classmethod
163 @override
164 def from_tag(cls, tag: bs4.Tag, **kwargs: str) -> Self:
165 if not tag.name == "meta" or not tag.get("property"):
166 raise ValueError(f"{tag.name} is not generic metadata item")
168 value = tag.string if tag.string is not None else ""
169 return super().from_tag(tag, value=value, **kwargs)
171 @override
172 def update_tag(self, field: str, value: ValueType | None):
173 if field == "value":
174 if value is None:
175 self.tag.string = ""
176 else:
177 self.tag.string = self.value_to_str(field, value)
178 else:
179 super().update_tag(field, value)
182class BookMetadata(XMLParent[MetadataItem]):
183 """The EPUB metadata, which contains information about the book."""
185 default_item_type: type[MetadataItem] = MetadataItem
186 tag_name: str | None = "metadata"
188 @override
189 def create_items(self) -> list[MetadataItem]:
190 items: list[MetadataItem] = []
191 for tag in self.tag.children:
192 if isinstance(tag, bs4.Tag):
193 try:
194 items.append(MetadataItem.detect(tag))
195 except EPUBMetadataError:
196 warn(f"Couldn't parse metadata item {tag}")
198 return items
200 def add(self, name: str, value: str):
201 item = GenericMetadataItem(name=name, value=value)
202 __ = self.add_item(item)
204 return item
206 def add_dc(self, name: str, value: str):
207 item = DublinCoreMetadataItem(name=name, value=value)
208 __ = self.add_item(item)
210 return item
212 @property
213 def identifier(self):
214 item = self.get("identifier")
215 if item and isinstance(item, DublinCoreMetadataItem):
216 return item.value
217 return None
219 @identifier.setter
220 def identifier(self, value: str):
221 item = self.get("identifier")
222 if item and isinstance(item, DublinCoreMetadataItem):
223 item.value = value
224 return
226 item = DublinCoreMetadataItem(
227 name="identifier",
228 value=value,
229 )
230 __ = self.add_item(item)
232 @property
233 def title(self):
234 item = self.get("title")
235 if item and isinstance(item, DublinCoreMetadataItem):
236 return item.value
237 return None
239 @title.setter
240 def title(self, value: str):
241 item = self.get("title")
242 if item and isinstance(item, DublinCoreMetadataItem):
243 item.value = value
244 return
246 item = DublinCoreMetadataItem(name="title", value=value)
247 __ = self.add_item(item)
249 @property
250 def language(self):
251 item = self.get("language")
252 if item and isinstance(item, DublinCoreMetadataItem):
253 return item.value
254 return None
256 @language.setter
257 def language(self, value: str):
258 item = self.get("language")
259 if item and isinstance(item, DublinCoreMetadataItem):
260 item.value = value
261 return
263 item = DublinCoreMetadataItem(name="language", value=value)
264 __ = self.add_item(item)
266 @property
267 def modified(self) -> datetime | None:
268 item = self.get("dcterms:modified")
269 if item and isinstance(item, GenericMetadataItem):
270 try:
271 return datetime.fromisoformat(item.value)
272 except ValueError:
273 return None
274 return None
276 @modified.setter
277 def modified(self, value: datetime):
278 str_value = datetime_to_str(value)
279 item = self.get("dcterms:modified")
280 if item and isinstance(item, GenericMetadataItem):
281 item.value = str_value
282 return
284 item = GenericMetadataItem(name="dcterms:modified", value=str_value)
285 __ = self.add_item(item)