Coverage for src/epublib/identifier.py: 100%

20 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 09:55 -0300

1import re 

2from typing import Self 

3 

4start = ( 

5 r":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D" 

6 r"\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF" 

7 r"\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF" 

8) 

9name = start + r"-.0-9\u00B7\u0300-\u036F\u203F-\u2040" 

10 

11valid_id_pattern = re.compile(f"^[{start}][{name}]*$") 

12 

13 

14class EPUBId(str): 

15 """A unique identifier for a resource within an EPUB file. 

16 Use this class to reference epub resources throughout the library 

17 using it's manifest id rather than its complete filename. 

18 """ 

19 

20 @classmethod 

21 def is_valid(cls, value: str) -> bool: 

22 """Check if the identifier is valid according to the XML specification. 

23 

24 Returns: 

25 bool: True if the identifier is valid, False otherwise. 

26 """ 

27 return bool(valid_id_pattern.match(value)) 

28 

29 @property 

30 def valid(self) -> bool: 

31 return self.is_valid(self) 

32 

33 @classmethod 

34 def to_valid(cls, value: str) -> Self: 

35 """Convert a string to a valid EPUBId by replacing invalid characters with underscores. 

36 

37 Args: 

38 value (str): The string to convert. 

39 

40 Returns: 

41 EPUBId: A valid EPUBId instance. 

42 """ 

43 if not value: 

44 raise ValueError("Identifier cannot be empty.") 

45 

46 # Replace invalid starting characters 

47 if not re.match(f"^[{start}]", value[0]): 

48 value = "_" + value[1:] 

49 

50 # Replace invalid characters in the rest of the string 

51 value = re.sub(f"[^{name}]", "_", value) 

52 

53 return cls(value)