phml.utils.validate.validate
1# pylint: disable=missing-module-docstring 2from re import match, split, sub 3 4from phml.nodes import All_Nodes, Comment, Element, Root, Text 5 6__all__ = [ 7 "validate", 8 "parent", 9 "literal", 10 "generated", 11 "has_property", 12 "is_heading", 13 "is_css_link", 14 "is_css_style", 15 "is_javascript", 16 "is_element", 17 "is_event_handler", 18] 19 20 21def validate(node: All_Nodes) -> bool: 22 """Validate a node based on attributes and type.""" 23 24 if hasattr(node, "children"): 25 if not hasattr(node, "type"): 26 raise AssertionError("Node should have a type") 27 28 if node.type not in ["root", "element"]: 29 raise AssertionError( 30 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 31 ) 32 33 if not all(isinstance(child, All_Nodes) for child in node.children): 34 raise AssertionError("Children must be a node type") 35 36 if hasattr(node, "properties"): 37 if hasattr(node, type) and node.type != "element": 38 raise AssertionError("Node must be of type 'element' to contain 'properties'") 39 40 if not all(isinstance(node.properties[prop], (int, str)) for prop in node.properties): 41 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 42 43 if hasattr(node, "value") and not isinstance(node.value, str): 44 raise AssertionError("Node 'value' must be of type 'str'") 45 46 47def parent(node: Root | Element) -> bool: 48 """Validate a parent node based on attributes and type.""" 49 if not hasattr(node, "children"): 50 raise AssertionError("Parent nodes should have the 'children' attribute") 51 52 if node.type == "element" and not hasattr(node, "properties"): 53 raise AssertionError("Parent element node shoudl have the 'properties' element.") 54 55 56def literal(node: Text | Comment) -> bool: 57 """Validate a literal node based on attributes.""" 58 59 if hasattr(node, "value"): 60 if not isinstance(node, str): 61 raise AssertionError("Literal nodes 'value' type should be 'str'") 62 63 64def generated(node: All_Nodes) -> bool: 65 """Checks if a node has been generated. A node is concidered 66 generated if it does not have a position. 67 68 Args: 69 node (All_Nodes): Node to check for position with. 70 71 Returns: 72 bool: Whether a node has a position or not. 73 """ 74 return hasattr(node, "position") and node.position is not None 75 76 77def is_heading(node) -> bool: 78 """Check if an element is a heading.""" 79 80 return node.type == "element" and match(r"h[1-6]", node.tag) is not None 81 82 83def is_css_link(node) -> bool: 84 """Check if an element is a `link` to a css file. 85 86 Returns `true` if `node` is a `<link>` element with a `rel` list that 87 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 88 as its `type` 89 """ 90 91 return ( 92 # Verify it is a element with a `link` tag 93 is_element(node, "link") 94 # Must have a rel list with stylesheet 95 and has_property(node, "rel") 96 and "stylesheet" in split(r" ", sub(r" +", " ", node.properties["rel"])) 97 and ( 98 # Can have a `type` of `text/css` or empty or no `type` 99 not has_property(node, "type") 100 or ( 101 has_property(node, "type") 102 and (node.properties["type"] == "text/css" or node.properties["type"] == "") 103 ) 104 ) 105 ) 106 107 108def is_css_style(node) -> bool: 109 """Check if an element is a css `style` element. 110 111 Returns `true` if `node` is a `<style>` element that 112 has no `type`, an empty `type`, or `'text/css'` as its `type`. 113 """ 114 115 return is_element(node, "style") and ( 116 not has_property(node, "type") 117 or ( 118 has_property(node, "type") 119 and (node.properties["type"] == "" or node.properties["type"] == "text/css") 120 ) 121 ) 122 123 124def is_javascript(node) -> bool: 125 """Check if an element is a javascript `script` element. 126 127 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 128 `type` and a valid JavaScript `language`, or has neither. 129 """ 130 return is_element(node, "script") and ( 131 ( 132 has_property(node, "type") 133 and node.properties["type"] in ["text/ecmascript", "text/javascript"] 134 and not has_property(node, "language") 135 ) 136 or ( 137 has_property(node, "language") 138 and node.properties["language"] in ["ecmascript", "javascript"] 139 and not has_property(node, "type") 140 ) 141 or (not has_property(node, "type") and not has_property(node, "language")) 142 ) 143 144 145def is_element(node, *conditions: str | list) -> bool: 146 """Checks if the given node is a certain element. 147 148 When providing an str it will check that the elements tag matches. 149 If a list is provided it checks that one of the conditions in the list 150 passes. 151 """ 152 153 if node.type != "element": 154 return False 155 156 return bool( 157 node.type == "element" 158 and all( 159 bool( 160 (isinstance(condition, str) and node.tag == condition) 161 or (isinstance(condition, list) and all(node.tag == nested for nested in condition)) 162 ) 163 for condition in conditions 164 ) 165 ) 166 167 168def is_event_handler(attribute: str) -> bool: 169 """Takes a attribute name and returns true if 170 it starts with `on` and its length is `5` or more. 171 """ 172 return attribute.startswith("on") and len(attribute) >= 5 173 174 175def has_property(node, attribute: str) -> bool: 176 """Check to see if an element has a certain property in properties.""" 177 if node.type == "element": 178 if attribute in node.properties: 179 return True 180 return False 181 182 183def is_embedded(node: Element) -> bool: 184 """Check to see if an element is an embedded element. 185 186 Embedded Elements: 187 188 * audio 189 * canvas 190 * embed 191 * iframe 192 * img 193 * MathML math 194 * object 195 * picture 196 * SVG svg 197 * video 198 199 Returns: 200 True if emedded 201 """ 202 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 203 204 return is_element( 205 node, 206 "audio", 207 "canvas", 208 "embed", 209 "iframe", 210 "img", 211 "math", 212 "object", 213 "picture", 214 "svg", 215 "video", 216 ) 217 218 219def is_interactive(node: Element) -> bool: 220 """Check if the element is intended for user interaction. 221 222 Conditions: 223 224 * a (if the href attribute is present) 225 * audio (if the controls attribute is present) 226 * button, details, embed, iframe, img (if the usemap attribute is present) 227 * input (if the type attribute is not in the Hidden state) 228 * label, select, text, area, video (if the controls attribute is present) 229 230 Returns: 231 True if element is interactive 232 """ 233 234 if is_element(node, "a"): 235 return has_property(node, "href") 236 237 if is_element(node, "input"): 238 return has_property(node, "type") and node.properties["type"].lower() != "hidden" 239 240 if is_element(node, "button", "details", "embed", "iframe", "img"): 241 return has_property(node, "usemap") 242 243 if is_element(node, "audio", "label", "select", "text", "area", "video"): 244 return has_property(node, "controls") 245 246 return False 247 248 249def is_phrasing(node: Element) -> bool: 250 """Check if a node is phrasing text according to 251 https://html.spec.whatwg.org/#phrasing-content-2. 252 253 Phrasing content is the text of the document, as well as elements that mark up that text at the 254 intra-paragraph level. Runs of phrasing content form paragraphs. 255 256 * area (if it is a descendant of a map element) 257 * link (if it is allowed in the body) 258 * meta (if the itemprop attribute is present) 259 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 260 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 261 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 262 template, textarea, time, u, var, video, wbr, text (true) 263 264 Returns: 265 True if the element is phrasing text 266 """ 267 268 if isinstance(node, Text): 269 return True 270 271 if is_element(node, "area"): 272 return node.parent is not None and is_element(node.parent, "map") 273 274 if is_element(node, "meta"): 275 return has_property(node, "itemprop") 276 277 if is_element(node, "link"): 278 body_ok = [ 279 "dns-prefetch", 280 "modulepreload", 281 "pingback", 282 "preconnect", 283 "prefetch", 284 "preload", 285 "prerender", 286 "stylesheet", 287 ] 288 289 return bool( 290 has_property(node, "itemprop") 291 or ( 292 has_property(node, "rel") 293 and all(token.strip() in body_ok for token in node.properties["rel"].split(" ")) 294 ) 295 ) 296 297 if is_element( 298 "node", 299 "map", 300 "mark", 301 "math", 302 "audio", 303 "b", 304 "bdi", 305 "bdo", 306 "br", 307 "button", 308 "canvas", 309 "cite", 310 "code", 311 "data", 312 "datalist", 313 "del", 314 "dfn", 315 "em", 316 "embed", 317 "i", 318 "iframe", 319 "img", 320 "input", 321 "ins", 322 "kbd", 323 "label", 324 "a", 325 "abbr", 326 "meter", 327 "noscript", 328 "object", 329 "output", 330 "picture", 331 "progress", 332 "q", 333 "ruby", 334 "s", 335 "samp", 336 "script", 337 "select", 338 "slot", 339 "small", 340 "span", 341 "strong", 342 "sub", 343 "sup", 344 "svg", 345 "template", 346 "textarea", 347 "time", 348 "u", 349 "var", 350 "video", 351 "wbr", 352 ): 353 return True 354 355 return False
22def validate(node: All_Nodes) -> bool: 23 """Validate a node based on attributes and type.""" 24 25 if hasattr(node, "children"): 26 if not hasattr(node, "type"): 27 raise AssertionError("Node should have a type") 28 29 if node.type not in ["root", "element"]: 30 raise AssertionError( 31 "Node should have a type of 'root' or 'element' to contain the 'children' attribute" 32 ) 33 34 if not all(isinstance(child, All_Nodes) for child in node.children): 35 raise AssertionError("Children must be a node type") 36 37 if hasattr(node, "properties"): 38 if hasattr(node, type) and node.type != "element": 39 raise AssertionError("Node must be of type 'element' to contain 'properties'") 40 41 if not all(isinstance(node.properties[prop], (int, str)) for prop in node.properties): 42 raise AssertionError("Node 'properties' must be of type 'int' or 'str'") 43 44 if hasattr(node, "value") and not isinstance(node.value, str): 45 raise AssertionError("Node 'value' must be of type 'str'")
Validate a node based on attributes and type.
48def parent(node: Root | Element) -> bool: 49 """Validate a parent node based on attributes and type.""" 50 if not hasattr(node, "children"): 51 raise AssertionError("Parent nodes should have the 'children' attribute") 52 53 if node.type == "element" and not hasattr(node, "properties"): 54 raise AssertionError("Parent element node shoudl have the 'properties' element.")
Validate a parent node based on attributes and type.
57def literal(node: Text | Comment) -> bool: 58 """Validate a literal node based on attributes.""" 59 60 if hasattr(node, "value"): 61 if not isinstance(node, str): 62 raise AssertionError("Literal nodes 'value' type should be 'str'")
Validate a literal node based on attributes.
65def generated(node: All_Nodes) -> bool: 66 """Checks if a node has been generated. A node is concidered 67 generated if it does not have a position. 68 69 Args: 70 node (All_Nodes): Node to check for position with. 71 72 Returns: 73 bool: Whether a node has a position or not. 74 """ 75 return hasattr(node, "position") and node.position is not None
Checks if a node has been generated. A node is concidered generated if it does not have a position.
Args
- node (All_Nodes): Node to check for position with.
Returns
bool: Whether a node has a position or not.
176def has_property(node, attribute: str) -> bool: 177 """Check to see if an element has a certain property in properties.""" 178 if node.type == "element": 179 if attribute in node.properties: 180 return True 181 return False
Check to see if an element has a certain property in properties.
78def is_heading(node) -> bool: 79 """Check if an element is a heading.""" 80 81 return node.type == "element" and match(r"h[1-6]", node.tag) is not None
Check if an element is a heading.
84def is_css_link(node) -> bool: 85 """Check if an element is a `link` to a css file. 86 87 Returns `true` if `node` is a `<link>` element with a `rel` list that 88 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 89 as its `type` 90 """ 91 92 return ( 93 # Verify it is a element with a `link` tag 94 is_element(node, "link") 95 # Must have a rel list with stylesheet 96 and has_property(node, "rel") 97 and "stylesheet" in split(r" ", sub(r" +", " ", node.properties["rel"])) 98 and ( 99 # Can have a `type` of `text/css` or empty or no `type` 100 not has_property(node, "type") 101 or ( 102 has_property(node, "type") 103 and (node.properties["type"] == "text/css" or node.properties["type"] == "") 104 ) 105 ) 106 )
Check if an element is a link
to a css file.
Returns true
if node
is a <link>
element with a rel
list that
contains 'stylesheet'
and has no type
, an empty type
, or 'text/css'
as its type
109def is_css_style(node) -> bool: 110 """Check if an element is a css `style` element. 111 112 Returns `true` if `node` is a `<style>` element that 113 has no `type`, an empty `type`, or `'text/css'` as its `type`. 114 """ 115 116 return is_element(node, "style") and ( 117 not has_property(node, "type") 118 or ( 119 has_property(node, "type") 120 and (node.properties["type"] == "" or node.properties["type"] == "text/css") 121 ) 122 )
Check if an element is a css style
element.
Returns true
if node
is a <style>
element that
has no type
, an empty type
, or 'text/css'
as its type
.
125def is_javascript(node) -> bool: 126 """Check if an element is a javascript `script` element. 127 128 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 129 `type` and a valid JavaScript `language`, or has neither. 130 """ 131 return is_element(node, "script") and ( 132 ( 133 has_property(node, "type") 134 and node.properties["type"] in ["text/ecmascript", "text/javascript"] 135 and not has_property(node, "language") 136 ) 137 or ( 138 has_property(node, "language") 139 and node.properties["language"] in ["ecmascript", "javascript"] 140 and not has_property(node, "type") 141 ) 142 or (not has_property(node, "type") and not has_property(node, "language")) 143 )
Check if an element is a javascript script
element.
Returns true
if node
is a <script>
element that has a valid JavaScript type
, has no
type
and a valid JavaScript language
, or has neither.
146def is_element(node, *conditions: str | list) -> bool: 147 """Checks if the given node is a certain element. 148 149 When providing an str it will check that the elements tag matches. 150 If a list is provided it checks that one of the conditions in the list 151 passes. 152 """ 153 154 if node.type != "element": 155 return False 156 157 return bool( 158 node.type == "element" 159 and all( 160 bool( 161 (isinstance(condition, str) and node.tag == condition) 162 or (isinstance(condition, list) and all(node.tag == nested for nested in condition)) 163 ) 164 for condition in conditions 165 ) 166 )
Checks if the given node is a certain element.
When providing an str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.
169def is_event_handler(attribute: str) -> bool: 170 """Takes a attribute name and returns true if 171 it starts with `on` and its length is `5` or more. 172 """ 173 return attribute.startswith("on") and len(attribute) >= 5
Takes a attribute name and returns true if
it starts with on
and its length is 5
or more.