Coverage for src/epublib/package/manifest.py: 98%

121 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-06 17:34 -0300

1import re 

2from dataclasses import dataclass 

3from pathlib import Path 

4from typing import Annotated, ClassVar, Literal, SupportsIndex, overload, override 

5 

6import bs4 

7 

8from epublib.exceptions import EPUBError 

9from epublib.identifier import EPUBId 

10from epublib.package.spine import SpineItemRef 

11from epublib.resources import Resource 

12from epublib.util import attr_to_str, strip_fragment 

13from epublib.xml_element import ( 

14 HrefElement, 

15 ParentOfHref, 

16 XMLAttribute, 

17 XMLElement, 

18) 

19 

20 

21def detect_remote_resources(soup: bs4.BeautifulSoup): 

22 for attr in "src", "href": 

23 for tag in soup.find_all(attrs={attr: True}): 

24 ref = attr_to_str(tag.get(attr)) 

25 if ref is not None: 

26 if re.search(r"^\w+://.*$", ref): 

27 return True 

28 

29 if ref.startswith("/"): 

30 return True 

31 

32 return False 

33 

34 

35def detect_manifest_properties(soup: bs4.BeautifulSoup) -> list[str]: 

36 properties: list[str] = [] 

37 

38 if soup.find("math"): 

39 properties.append("mathml") 

40 

41 if detect_remote_resources(soup): 

42 properties.append("remote-resources") 

43 

44 if soup.find("script"): 

45 properties.append("scripted") 

46 

47 if soup.find("epub:switch"): 

48 properties.append("switch") 

49 

50 return properties 

51 

52 

53@dataclass(kw_only=True) 

54class ManifestItem(HrefElement): 

55 """An item in the EPUB manifest.""" 

56 

57 id: Annotated[EPUBId, XMLAttribute()] 

58 media_type: Annotated[str, XMLAttribute("media-type")] 

59 fallback: Annotated[str | None, XMLAttribute()] = None 

60 media_overlay: Annotated[str | None, XMLAttribute("media-overlay")] = None 

61 properties: Annotated[list[str] | None, XMLAttribute()] = None 

62 

63 tag_name: ClassVar[str] = "item" 

64 

65 def __post_init__(self): 

66 super().__post_init__() 

67 self.id = EPUBId(self.id) 

68 

69 def add_property(self, prop: str): 

70 if self.properties is None: 

71 self.properties = [] 

72 if prop not in self.properties: 

73 self.properties.append(prop) 

74 

75 self.update_tag("properties", self.properties) 

76 

77 def has_property(self, prop: str) -> bool: 

78 if self.properties is None: 

79 return False 

80 return prop in self.properties 

81 

82 def remove_property(self, prop: str): 

83 if self.properties is None: 

84 return 

85 try: 

86 self.properties.remove(prop) 

87 except ValueError: 

88 pass 

89 

90 if not self.properties: 

91 self.properties = None 

92 

93 self.update_tag("properties", self.properties) 

94 

95 

96type ItemIdentifier = str | Path | Resource | SpineItemRef | EPUBId 

97 

98 

99class BookManifest(ParentOfHref[ManifestItem]): 

100 """The EPUB manifest, which is a list of all resources in the book.""" 

101 

102 def __post_init__(self) -> None: 

103 super().__post_init__() 

104 self._cover_image: ManifestItem | None = None 

105 

106 @property 

107 def nav(self): 

108 try: 

109 return next( 

110 ( 

111 item 

112 for item in self.items 

113 if item.properties and "nav" in item.properties 

114 ), 

115 ) 

116 except StopIteration as error: 

117 raise EPUBError("No navigation document found in manifest") from error 

118 

119 @property 

120 def cover_image(self): 

121 return next( 

122 ( 

123 item 

124 for item in self.items 

125 if item.properties and "cover-image" in item.properties 

126 ), 

127 None, 

128 ) 

129 

130 @override 

131 def add_item(self, item: ManifestItem) -> ManifestItem: 

132 if item in self.items: 

133 raise EPUBError(f"Item {item} is already in the manifest") 

134 

135 if any( 

136 item.id == other.id or item.filename == other.filename 

137 for other in self.items 

138 ): 

139 if any(item.id == other.id for other in self.items): 

140 raise EPUBError(f"An item with id {item.id} is already in the manifest") 

141 

142 if any(item.filename == other.filename for other in self.items): 

143 raise EPUBError( 

144 f"An item with filename {item.filename} is already in the manifest" 

145 ) 

146 

147 return super().add_item(item) 

148 

149 @overload 

150 def _get_by_id(self, id: EPUBId, raise_error: Literal[True]) -> ManifestItem: ... 

151 

152 @overload 

153 def _get_by_id( 

154 self, 

155 id: EPUBId, 

156 raise_error: bool = False, 

157 ) -> ManifestItem | None: ... 

158 

159 def _get_by_id(self, id: EPUBId, raise_error: bool = False): 

160 try: 

161 return next(item for item in self.items if item.id == id) 

162 except StopIteration as exception: 

163 if raise_error: 

164 raise KeyError(id) from exception 

165 return None 

166 

167 @override 

168 def __getitem__( 

169 self, 

170 name: ItemIdentifier | SupportsIndex, 

171 ): 

172 if isinstance(name, SupportsIndex): 

173 return super().__getitem__(name) 

174 

175 value = self.get(name) 

176 if value is None: 

177 raise KeyError(name) 

178 return value 

179 

180 @override 

181 def get( # type: ignore[reportIncompatibleMethodOverride] 

182 self, 

183 name: ItemIdentifier, 

184 cls: type[XMLElement] | None = None, 

185 ignore_fragment: bool = True, 

186 ): 

187 if isinstance(name, (EPUBId, SpineItemRef)): 

188 if isinstance(name, SpineItemRef): 

189 name = name.idref 

190 item = self._get_by_id(name, raise_error=False) 

191 if item is None: 

192 return None 

193 name = item.filename 

194 

195 elif isinstance(name, Resource): 

196 name = name.filename 

197 

198 if ignore_fragment: 

199 name = strip_fragment(name) 

200 

201 return super().get(str(name), ignore_fragment=False) 

202 

203 @override 

204 def remove(self, filename: ItemIdentifier, ignore_fragment: bool = True): 

205 item = self.get(filename, ignore_fragment=ignore_fragment) 

206 if item: 

207 return self.remove_item(item)