Coverage for phml\utils\validate\validate.py: 19%

89 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-30 09:38 -0600

1from re import match, split, sub 

2 

3from phml.nodes import All_Nodes, Comment, Element, Root, Text 

4 

5__all__ = [ 

6 "validate", 

7 "parent", 

8 "literal", 

9 "generated", 

10 "has_property", 

11 "is_heading", 

12 "is_css_link", 

13 "is_css_style", 

14 "is_javascript", 

15 "is_element", 

16 "is_event_handler", 

17] 

18 

19 

20def validate(node: All_Nodes) -> bool: 

21 """Validate a node based on attributes and type.""" 

22 

23 if hasattr(node, "children"): 

24 if not hasattr(node, "type"): 

25 raise AssertionError("Node should be have a type") 

26 elif node.type not in ["root", "element"]: 

27 raise AssertionError( 

28 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 

29 ) 

30 else: 

31 for n in node.children: 

32 if not isinstance(n, All_Nodes): 

33 raise AssertionError("Children must be a node type") 

34 if hasattr(node, "properties"): 

35 if hasattr(node, type) and node.type != "element": 

36 raise AssertionError("Node must be of type 'element' to contain 'properties'") 

37 else: 

38 for prop in node.properties: 

39 if not isinstance(node.properties[prop], (int, str)): 

40 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 

41 if hasattr(node, "value"): 

42 if not isinstance(node.value, str): 

43 raise AssertionError("Node 'value' must be of type 'str'") 

44 

45 

46def parent(node: Root | Element) -> bool: 

47 """Validate a parent node based on attributes and type.""" 

48 if not hasattr(node, "children"): 

49 raise AssertionError("Parent nodes should have the 'children' attribute") 

50 elif node.type == "element" and not hasattr(node, "properties"): 

51 raise AssertionError("Parent element node shoudl have the 'properties' element.") 

52 

53 

54def literal(node: Text | Comment) -> bool: 

55 """Validate a literal node based on attributes.""" 

56 

57 if hasattr(node, "value"): 

58 if not isinstance(node, str): 

59 raise AssertionError("Literal nodes 'value' type should be 'str'") 

60 

61 

62def generated(node: All_Nodes) -> bool: 

63 """Checks if a node has been generated. A node is concidered 

64 generated if it does not have a position. 

65 

66 Args: 

67 node (All_Nodes): Node to check for position with. 

68 

69 Returns: 

70 bool: Whether a node has a position or not. 

71 """ 

72 return hasattr(node, "position") and node.position is not None 

73 

74 

75def is_heading(node) -> bool: 

76 """Check if an element is a heading.""" 

77 

78 return node.type == "element" and match(r"h[1-6]", node.tag) is not None 

79 

80 

81def is_css_link(node) -> bool: 

82 """Check if an element is a `link` to a css file. 

83 

84 Returns `true` if `node` is a `<link>` element with a `rel` list that 

85 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 

86 as its `type` 

87 """ 

88 

89 return ( 

90 # Verify it is a element with a `link` tag 

91 is_element(node, "link") 

92 # Must have a rel list with stylesheet 

93 and has_property(node, "rel") 

94 and "stylesheet" in split(r" ", sub(r" +", " ", node.properties["rel"])) 

95 and ( 

96 # Can have a `type` of `text/css` or empty or no `type` 

97 not has_property(node, "type") 

98 or ( 

99 has_property(node, "type") 

100 and (node.properties["type"] == "text/css" or node.properties["type"] == "") 

101 ) 

102 ) 

103 ) 

104 

105 

106def is_css_style(node) -> bool: 

107 """Check if an element is a css `style` element. 

108 

109 Returns `true` if `node` is a `<style>` element that 

110 has no `type`, an empty `type`, or `'text/css'` as its `type`. 

111 """ 

112 

113 return is_element(node, "style") and ( 

114 not has_property(node, "type") 

115 or ( 

116 has_property(node, "type") 

117 and (node.properties["type"] == "" or node.properties["type"] == "text/css") 

118 ) 

119 ) 

120 

121 

122def is_javascript(node) -> bool: 

123 """Check if an element is a javascript `script` element. 

124 

125 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no `type` and a valid JavaScript `language`, or has neither. 

126 """ 

127 return is_element(node, "script") and ( 

128 ( 

129 has_property(node, "type") 

130 and node.properties["type"] in ["text/ecmascript", "text/javascript"] 

131 and not has_property(node, "language") 

132 ) 

133 or ( 

134 has_property(node, "language") 

135 and node.properties["language"] in ["ecmascript", "javascript"] 

136 and not has_property(node, "type") 

137 ) 

138 or (not has_property(node, "type") and not has_property(node, "language")) 

139 ) 

140 

141 

142def is_element(node, *conditions: str | list) -> bool: 

143 """Checks if the given node is a certain element. 

144 

145 When providing an str it will check that the elements tag matches. 

146 If a list is provided it checks that one of the conditions in the list 

147 passes. 

148 """ 

149 

150 if node.type != "element": 

151 return False 

152 

153 for condition in conditions: 

154 if isinstance(condition, str) and node.tag == condition: 

155 return True 

156 elif isinstance(condition, list): 

157 for c in condition: 

158 if node.tag == c: 

159 return True 

160 return False 

161 

162 

163def is_event_handler(attribute: str) -> bool: 

164 """Takes a attribute name and returns true if 

165 it starts with `on` and its length is `5` or more. 

166 """ 

167 return attribute.startswith("on") and len(attribute) >= 5 

168 

169 

170def has_property(node, attribute: str) -> bool: 

171 """Check to see if an element has a certain property in properties.""" 

172 if node.type == "element": 

173 if attribute in node.properties: 

174 return True 

175 return False 

176 

177 

178def is_embedded(node: Element) -> bool: 

179 """Check to see if an element is an embedded element. 

180 

181 Embedded Elements: 

182 

183 * audio 

184 * canvas 

185 * embed 

186 * iframe 

187 * img 

188 * MathML math 

189 * object 

190 * picture 

191 * SVG svg 

192 * video 

193 

194 Returns: 

195 True if emedded 

196 """ 

197 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 

198 

199 return is_element( 

200 node, 

201 "audio", 

202 "canvas", 

203 "embed", 

204 "iframe", 

205 "img", 

206 "math", 

207 "object", 

208 "picture", 

209 "svg", 

210 "video", 

211 ) 

212 

213 

214def is_interactive(node: Element) -> bool: 

215 """Check if the element is intended for user interaction. 

216 

217 Conditions: 

218 

219 * a (if the href attribute is present) 

220 * audio (if the controls attribute is present) 

221 * button, details, embed, iframe, img (if the usemap attribute is present) 

222 * input (if the type attribute is not in the Hidden state) 

223 * label, select, text, area, video (if the controls attribute is present) 

224 

225 Returns: 

226 True if element is interactive 

227 """ 

228 

229 if is_element(node, "a"): 

230 return has_property(node, "href") 

231 elif is_element(node, "input"): 

232 return has_property(node, "type") and node.properties["type"].lower() != "hidden" 

233 elif is_element(node, "button", "details", "embed", "iframe", "img"): 

234 return has_property(node, "usemap") 

235 elif is_element(node, "audio", "label", "select", "text", "area", "video"): 

236 return has_property(node, "controls") 

237 

238 

239def is_phrasing(node: Element) -> bool: 

240 """Check if a node is phrasing text according to 

241 https://html.spec.whatwg.org/#phrasing-content-2. 

242 

243 Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs. 

244 

245 * area (if it is a descendant of a map element) 

246 * link (if it is allowed in the body) 

247 * meta (if the itemprop attribute is present) 

248 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, 

249 i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, 

250 progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, 

251 textarea, time, u, var, video, wbr, text (true) 

252 

253 Returns: 

254 True if the element is phrasing text 

255 """ 

256 

257 if isinstance(node, Text): 

258 return True 

259 elif is_element(node, "area"): 

260 return node.parent is not None and is_element(node.parent, "map") 

261 elif is_element(node, "meta"): 

262 return has_property(node, "itemprop") 

263 elif is_element(node, "link"): 

264 body_ok = [ 

265 "dns-prefetch", 

266 "modulepreload", 

267 "pingback", 

268 "preconnect", 

269 "prefetch", 

270 "preload", 

271 "prerender", 

272 "stylesheet", 

273 ] 

274 

275 if has_property(node, "itemprop"): 

276 return True 

277 elif has_property(node, "rel"): 

278 tokens = node.properties["rel"].split(" ") 

279 for token in tokens: 

280 if token.strip() not in body_ok: 

281 return False 

282 return True 

283 return False 

284 elif is_element( 

285 "node", 

286 "map", 

287 "mark", 

288 "math", 

289 "audio", 

290 "b", 

291 "bdi", 

292 "bdo", 

293 "br", 

294 "button", 

295 "canvas", 

296 "cite", 

297 "code", 

298 "data", 

299 "datalist", 

300 "del", 

301 "dfn", 

302 "em", 

303 "embed", 

304 "i", 

305 "iframe", 

306 "img", 

307 "input", 

308 "ins", 

309 "kbd", 

310 "label", 

311 "a", 

312 "abbr", 

313 "meter", 

314 "noscript", 

315 "object", 

316 "output", 

317 "picture", 

318 "progress", 

319 "q", 

320 "ruby", 

321 "s", 

322 "samp", 

323 "script", 

324 "select", 

325 "slot", 

326 "small", 

327 "span", 

328 "strong", 

329 "sub", 

330 "sup", 

331 "svg", 

332 "template", 

333 "textarea", 

334 "time", 

335 "u", 

336 "var", 

337 "video", 

338 "wbr", 

339 ): 

340 return True