Coverage for src/epublib/identifier.py: 100%
20 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 09:55 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 09:55 -0300
1import re
2from typing import Self
4start = (
5 r":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D"
6 r"\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF"
7 r"\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF"
8)
9name = start + r"-.0-9\u00B7\u0300-\u036F\u203F-\u2040"
11valid_id_pattern = re.compile(f"^[{start}][{name}]*$")
14class EPUBId(str):
15 """A unique identifier for a resource within an EPUB file.
16 Use this class to reference epub resources throughout the library
17 using it's manifest id rather than its complete filename.
18 """
20 @classmethod
21 def is_valid(cls, value: str) -> bool:
22 """Check if the identifier is valid according to the XML specification.
24 Returns:
25 bool: True if the identifier is valid, False otherwise.
26 """
27 return bool(valid_id_pattern.match(value))
29 @property
30 def valid(self) -> bool:
31 return self.is_valid(self)
33 @classmethod
34 def to_valid(cls, value: str) -> Self:
35 """Convert a string to a valid EPUBId by replacing invalid characters with underscores.
37 Args:
38 value (str): The string to convert.
40 Returns:
41 EPUBId: A valid EPUBId instance.
42 """
43 if not value:
44 raise ValueError("Identifier cannot be empty.")
46 # Replace invalid starting characters
47 if not re.match(f"^[{start}]", value[0]):
48 value = "_" + value[1:]
50 # Replace invalid characters in the rest of the string
51 value = re.sub(f"[^{name}]", "_", value)
53 return cls(value)