Coverage for phml\utilities\transform\sanitize\schema.py: 100%

16 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-12 14:26 -0500

1"""Defines the schema on how to sanitize the phml ast.""" 

2from __future__ import annotations 

3 

4from dataclasses import dataclass, field 

5 

6 

7def _extend_dict_dict_( 

8 origin: dict[str, dict], new: dict[str, dict] 

9) -> dict[str, dict]: 

10 for key, value in new.items(): 

11 if key not in origin: 

12 origin[key] = value 

13 else: 

14 origin[key].update(value) 

15 

16 return origin 

17 

18 

19def _extend_dict_list_( 

20 origin: dict[str, list], new: dict[str, list] 

21) -> dict[str, list]: 

22 for key, value in new.items(): 

23 if key not in origin: 

24 origin[key] = value 

25 else: 

26 origin[key].extend([item for item in value if item not in origin[key]]) 

27 

28 return origin 

29 

30 

31@dataclass 

32class Schema: 

33 """Dataclass of information on how to sanatize a phml tree. 

34 

35 `strip (list[str])`: The elements to strip from the tree. 

36 `protocols (dict[str, list])`: Collection of element name and allowed protocal value list 

37 `tag_names (list[str])`: List of allowed tag names. 

38 `attributes (dict[str, list[str | list[str]]])`: Collection of element name and allowed property 

39 names. 

40 `required (dict[str, dict[str, str | bool]])`: Collection of element names and their required 

41 properties and required property values. 

42 """ 

43 

44 strip: list[str] = field(default_factory=lambda: ["script"]) 

45 ancestors: dict[str, list] = field( 

46 default_factory=lambda: { 

47 "tbody": ["table"], 

48 "tfoot": ["table"], 

49 "thead": ["table"], 

50 "td": ["table"], 

51 "th": ["table"], 

52 "tr": ["table"], 

53 }, 

54 ) 

55 protocols: dict[str, list] = field( 

56 default_factory=lambda: { 

57 "href": ["http", "https", "mailto", "xmpp", "irc", "ircs"], 

58 "cite": ["http", "https"], 

59 "src": ["http", "https"], 

60 "longDesc": ["http", "https"], 

61 }, 

62 ) 

63 tag_names: list[str] = field( 

64 default_factory=lambda: [ 

65 "h1", 

66 "h2", 

67 "h3", 

68 "h4", 

69 "h5", 

70 "h6", 

71 "br", 

72 "b", 

73 "i", 

74 "strong", 

75 "em", 

76 "a", 

77 "pre", 

78 "code", 

79 "img", 

80 "tt", 

81 "div", 

82 "ins", 

83 "del", 

84 "sup", 

85 "sub", 

86 "p", 

87 "ol", 

88 "ul", 

89 "table", 

90 "thead", 

91 "tbody", 

92 "tfoot", 

93 "blockquote", 

94 "dl", 

95 "dt", 

96 "dd", 

97 "kbd", 

98 "q", 

99 "samp", 

100 "var", 

101 "hr", 

102 "ruby", 

103 "rt", 

104 "rp", 

105 "li", 

106 "tr", 

107 "td", 

108 "th", 

109 "s", 

110 "strike", 

111 "summary", 

112 "details", 

113 "caption", 

114 "figure", 

115 "figcaption", 

116 "abbr", 

117 "bdo", 

118 "cite", 

119 "dfn", 

120 "mark", 

121 "small", 

122 "span", 

123 "time", 

124 "wbr", 

125 "input", 

126 ], 

127 ) 

128 attributes: dict[str, list[str | tuple[str | bool, ...]]] = field( 

129 default_factory=lambda: { 

130 "a": ["href"], 

131 "article": ["class"], 

132 "img": ["src", "longDesc", "loading"], 

133 "input": [("type", "checkbox"), ("disabled", True)], 

134 "li": [("class", "task-list-item")], 

135 "div": ["itemScope", "itemType"], 

136 "blockquote": ["cite"], 

137 "del": ["cite"], 

138 "ins": ["cite"], 

139 "q": ["cite"], 

140 "*": [ 

141 "abbr", 

142 "accept", 

143 "acceptCharset", 

144 "accessKey", 

145 "action", 

146 "align", 

147 "alt", 

148 "ariaDescribedBy", 

149 "ariaHidden", 

150 "ariaLabel", 

151 "ariaLabelledBy", 

152 "axis", 

153 "border", 

154 "cellPadding", 

155 "cellSpacing", 

156 "char", 

157 "charOff", 

158 "charSet", 

159 "checked", 

160 "clear", 

161 "cols", 

162 "colSpan", 

163 "color", 

164 "compact", 

165 "coords", 

166 "dateTime", 

167 "dir", 

168 "disabled", 

169 "encType", 

170 "htmlFor", 

171 "frame", 

172 "headers", 

173 "height", 

174 "hrefLang", 

175 "hSpace", 

176 "isMap", 

177 "id", 

178 "label", 

179 "lang", 

180 "maxLength", 

181 "media", 

182 "method", 

183 "multiple", 

184 "name", 

185 "noHref", 

186 "noShade", 

187 "noWrap", 

188 "open", 

189 "prompt", 

190 "readOnly", 

191 "rel", 

192 "rev", 

193 "rows", 

194 "rowSpan", 

195 "rules", 

196 "scope", 

197 "selected", 

198 "shape", 

199 "size", 

200 "span", 

201 "start", 

202 "summary", 

203 "tabIndex", 

204 "target", 

205 "title", 

206 "type", 

207 "useMap", 

208 "vAlign", 

209 "value", 

210 "vSpace", 

211 "width", 

212 "itemProp", 

213 ], 

214 }, 

215 ) 

216 required: dict[str, dict[str, str | bool]] = field( 

217 default_factory=lambda: { 

218 "input": { 

219 "type": "checkbox", 

220 "disabled": True, 

221 }, 

222 }, 

223 ) 

224 

225 def extend( 

226 self, 

227 strip: list[str] | None = None, 

228 ancestors: dict[str, list[str]] | None = None, 

229 protocols: dict[str, list[str]] | None = None, 

230 tag_names: list[str] | None = None, 

231 attributes: dict[str, list[str | tuple[str | bool, ...]]] | None = None, 

232 required: dict[str, dict[str, str | bool]] | None = None, 

233 ) -> Schema: 

234 """Extend the default schemas values. 

235 

236 Args: 

237 `strip (list[str])`: The elements to strip from the tree. 

238 `ancestors (dict[str, list[str]])`: Key is a element tag and the value is a list of valid 

239 parent elements. 

240 `protocols (dict[str, list[str]])`: Collection of element names to list of valid protocols (prefixes). 

241 `tag_names (list[str])`: List of allowed tag names. 

242 `attributes (dict[str, list[str | list[str]]])`: Collection of element name and allowed property 

243 names. 

244 `required (dict[str, dict[str, str | bool]])`: Collection of element names and their required 

245 properties and required property values. 

246 """ 

247 

248 return Schema( 

249 strip=list(set([*self.strip, *(strip or [])])), 

250 ancestors=_extend_dict_list_({**self.ancestors}, ancestors or {}), 

251 protocols=_extend_dict_list_({**self.protocols}, protocols or {}), 

252 attributes=_extend_dict_list_({**self.attributes}, attributes or {}), 

253 tag_names=list(set([*self.tag_names, *(tag_names or [])])), 

254 required=_extend_dict_dict_({**self.required}, required or {}), 

255 )