phml.utilities.validate.validate
1from re import match, split, sub 2from typing import Any 3 4from phml.nodes import Element, Literal, Node, Parent 5 6__all__ = [ 7 "validate", 8 "generated", 9 "is_heading", 10 "is_css_link", 11 "is_css_style", 12 "is_javascript", 13 "is_element", 14 "is_embedded", 15 "is_interactive", 16 "is_phrasing", 17 "is_event_handler", 18 "blank", 19] 20 21 22def validate(node: Node) -> bool: 23 """Validate a node based on attributes and type.""" 24 25 if isinstance(node, Parent) and not all(isinstance(child, Node) for child in node): 26 raise AssertionError("Children must be a node type") 27 28 if isinstance(node, Element): 29 if not all(isinstance(node[prop], (bool, str)) for prop in node.attributes): 30 raise AssertionError("Element 'attributes' must be of type 'bool' or 'str'") 31 32 if isinstance(node, Literal) and not isinstance(node.content, str): 33 raise AssertionError("Literal 'content' must be of type 'str'") 34 35 return True 36 37 38def generated(node: Node) -> bool: 39 """Checks if a node has been generated. A node is concidered 40 generated if it does not have a position. 41 42 Args: 43 node (Node): Node to check for position with. 44 45 Returns: 46 bool: Whether a node has a position or not. 47 """ 48 return node.position is None 49 50 51def is_heading(node: Element) -> bool: 52 """Check if an element is a heading.""" 53 54 if node.type == "element": 55 if match(r"h[1-6]", node.tag) is not None: 56 return True 57 return False 58 raise TypeError("Node must be an element.") 59 60 61def is_css_link(node: Element) -> bool: 62 """Check if an element is a `link` to a css file. 63 64 Returns `true` if `node` is a `<link>` element with a `rel` list that 65 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 66 as its `type` 67 """ 68 69 return ( 70 # Verify it is a element with a `link` tag 71 is_element(node, "link") 72 # Must have a rel list with stylesheet 73 and "rel" in node 74 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 75 and ( 76 # Can have a `type` of `text/css` or empty or no `type` 77 "type" not in node 78 or ("type" in node and (node["type"] in ["text/css", ""])) 79 ) 80 ) 81 82 83def is_css_style(node: Element) -> bool: 84 """Check if an element is a css `style` element. 85 86 Returns `true` if `node` is a `<style>` element that 87 has no `type`, an empty `type`, or `'text/css'` as its `type`. 88 """ 89 90 return is_element(node, "style") and ( 91 "type" not in node or ("type" in node and (node["type"] in ["", "text/css"])) 92 ) 93 94 95def is_javascript(node: Element) -> bool: 96 """Check if an element is a javascript `script` element. 97 98 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 99 `type` and a valid JavaScript `language`, or has neither. 100 """ 101 return is_element(node, "script") and ( 102 ( 103 "type" in node 104 and node["type"] in ["text/ecmascript", "text/javascript"] 105 and "language" not in node 106 ) 107 or ( 108 "language" in node 109 and node["language"] in ["ecmascript", "javascript"] 110 and "type" not in node 111 ) 112 or ("type" not in node and "language" not in node) 113 ) 114 115 116def is_element(node: Node, *conditions: str | list) -> bool: 117 """Checks if the given node is a certain element. 118 119 When providing a str it will check that the elements tag matches. 120 If a list is provided it checks that one of the conditions in the list 121 passes. 122 """ 123 124 if isinstance(node, Element): 125 if len(conditions) > 0: 126 return any( 127 bool( 128 (isinstance(condition, str) and node.tag == condition) 129 or ( 130 isinstance(condition, list) 131 and any(node.tag == nested for nested in condition) 132 ), 133 ) 134 for condition in conditions 135 ) 136 else: 137 return True 138 return False 139 140 141def is_event_handler(attribute: str) -> bool: 142 """Takes a attribute name and returns true if 143 it starts with `on` and its length is `5` or more. 144 """ 145 return attribute.startswith("on") and len(attribute) >= 5 146 147 148def is_embedded(node: Element) -> bool: 149 """Check to see if an element is an embedded element. 150 151 Embedded Elements: 152 153 * audio 154 * canvas 155 * embed 156 * iframe 157 * img 158 * MathML math 159 * object 160 * picture 161 * SVG svg 162 * video 163 164 Returns: 165 True if emedded 166 """ 167 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 168 169 return is_element( 170 node, 171 "audio", 172 "canvas", 173 "embed", 174 "iframe", 175 "img", 176 "math", 177 "object", 178 "picture", 179 "svg", 180 "video", 181 ) 182 183 184def is_interactive(node: Element) -> bool: 185 """Check if the element is intended for user interaction. 186 187 Conditions: 188 189 * a (if the href attribute is present) 190 * audio (if the controls attribute is present) 191 * button, details, embed, iframe, img (if the usemap attribute is present) 192 * input (if the type attribute is not in the Hidden state) 193 * label, select, text, area, video (if the controls attribute is present) 194 195 Returns: 196 True if element is interactive 197 """ 198 199 if is_element(node, "a"): 200 return "href" in node 201 202 if is_element(node, "input"): 203 return "type" in node and str(node["type"]).lower() != "hidden" 204 205 if is_element(node, "img"): 206 return "usemap" in node and node["usemap"] is True 207 208 if is_element(node, "video"): 209 return "controls" in node 210 211 if is_element( 212 node, "button", "details", "embed", "iframe", "label", "select", "textarea" 213 ): 214 return True 215 216 return False 217 218 219def is_phrasing(node: Element) -> bool: 220 """Check if a node is phrasing text according to 221 https://html.spec.whatwg.org/#phrasing-content-2. 222 223 Phrasing content is the text of the document, as well as elements that mark up that text at the 224 intra-paragraph level. Runs of phrasing content form paragraphs. 225 226 * area (if it is a descendant of a map element) 227 * link (if it is allowed in the body) 228 * meta (if the itemprop attribute is present) 229 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 230 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 231 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 232 template, textarea, time, u, var, video, wbr, text (true) 233 234 Returns: 235 True if the element is phrasing text 236 """ 237 238 if Literal.is_text(node): 239 return True 240 241 if is_element(node, "area"): 242 return node.parent is not None and is_element(node.parent, "map") 243 244 if is_element(node, "meta"): 245 return "itemprop" in node 246 247 if is_element(node, "link"): 248 body_ok = [ 249 "dns-prefetch", 250 "modulepreload", 251 "pingback", 252 "preconnect", 253 "prefetch", 254 "preload", 255 "prerender", 256 "stylesheet", 257 ] 258 259 return bool( 260 "itemprop" in node 261 or ( 262 "rel" in node 263 and all( 264 token in body_ok for token in str(node["rel"]).split(" ") 265 if token.strip() != "" 266 ) 267 ), 268 ) 269 270 if is_element( 271 node, 272 "node", 273 "map", 274 "mark", 275 "math", 276 "audio", 277 "b", 278 "bdi", 279 "bdo", 280 "br", 281 "button", 282 "canvas", 283 "cite", 284 "code", 285 "data", 286 "datalist", 287 "del", 288 "dfn", 289 "em", 290 "embed", 291 "i", 292 "iframe", 293 "img", 294 "input", 295 "ins", 296 "kbd", 297 "label", 298 "a", 299 "abbr", 300 "meter", 301 "noscript", 302 "object", 303 "output", 304 "picture", 305 "progress", 306 "q", 307 "ruby", 308 "s", 309 "samp", 310 "script", 311 "select", 312 "slot", 313 "small", 314 "span", 315 "strong", 316 "sub", 317 "sup", 318 "svg", 319 "template", 320 "textarea", 321 "time", 322 "u", 323 "var", 324 "video", 325 "wbr", 326 ): 327 return True 328 329 return False 330 331 332def blank(value: Any) -> bool: 333 """Takes any value type and returns whether it is blank/None. 334 For strings if the value is stripped and is equal to '' then it is blank. 335 Otherwise if len > 0 and is not None then not blank. 336 337 Args: 338 value (Any): The value to check if it is blank. 339 340 Returns: 341 bool: True if value is blank 342 """ 343 344 if value is None or not hasattr(value, "__len__"): 345 return True 346 347 if isinstance(value, str): 348 value = value.strip() 349 350 return len(value) == 0
23def validate(node: Node) -> bool: 24 """Validate a node based on attributes and type.""" 25 26 if isinstance(node, Parent) and not all(isinstance(child, Node) for child in node): 27 raise AssertionError("Children must be a node type") 28 29 if isinstance(node, Element): 30 if not all(isinstance(node[prop], (bool, str)) for prop in node.attributes): 31 raise AssertionError("Element 'attributes' must be of type 'bool' or 'str'") 32 33 if isinstance(node, Literal) and not isinstance(node.content, str): 34 raise AssertionError("Literal 'content' must be of type 'str'") 35 36 return True
Validate a node based on attributes and type.
39def generated(node: Node) -> bool: 40 """Checks if a node has been generated. A node is concidered 41 generated if it does not have a position. 42 43 Args: 44 node (Node): Node to check for position with. 45 46 Returns: 47 bool: Whether a node has a position or not. 48 """ 49 return node.position is None
Checks if a node has been generated. A node is concidered generated if it does not have a position.
Args
- node (Node): Node to check for position with.
Returns
bool: Whether a node has a position or not.
52def is_heading(node: Element) -> bool: 53 """Check if an element is a heading.""" 54 55 if node.type == "element": 56 if match(r"h[1-6]", node.tag) is not None: 57 return True 58 return False 59 raise TypeError("Node must be an element.")
Check if an element is a heading.
62def is_css_link(node: Element) -> bool: 63 """Check if an element is a `link` to a css file. 64 65 Returns `true` if `node` is a `<link>` element with a `rel` list that 66 contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'` 67 as its `type` 68 """ 69 70 return ( 71 # Verify it is a element with a `link` tag 72 is_element(node, "link") 73 # Must have a rel list with stylesheet 74 and "rel" in node 75 and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"])) 76 and ( 77 # Can have a `type` of `text/css` or empty or no `type` 78 "type" not in node 79 or ("type" in node and (node["type"] in ["text/css", ""])) 80 ) 81 )
Check if an element is a link
to a css file.
Returns true
if node
is a <link>
element with a rel
list that
contains 'stylesheet'
and has no type
, an empty type
, or 'text/css'
as its type
84def is_css_style(node: Element) -> bool: 85 """Check if an element is a css `style` element. 86 87 Returns `true` if `node` is a `<style>` element that 88 has no `type`, an empty `type`, or `'text/css'` as its `type`. 89 """ 90 91 return is_element(node, "style") and ( 92 "type" not in node or ("type" in node and (node["type"] in ["", "text/css"])) 93 )
Check if an element is a css style
element.
Returns true
if node
is a <style>
element that
has no type
, an empty type
, or 'text/css'
as its type
.
96def is_javascript(node: Element) -> bool: 97 """Check if an element is a javascript `script` element. 98 99 Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no 100 `type` and a valid JavaScript `language`, or has neither. 101 """ 102 return is_element(node, "script") and ( 103 ( 104 "type" in node 105 and node["type"] in ["text/ecmascript", "text/javascript"] 106 and "language" not in node 107 ) 108 or ( 109 "language" in node 110 and node["language"] in ["ecmascript", "javascript"] 111 and "type" not in node 112 ) 113 or ("type" not in node and "language" not in node) 114 )
Check if an element is a javascript script
element.
Returns true
if node
is a <script>
element that has a valid JavaScript type
, has no
type
and a valid JavaScript language
, or has neither.
117def is_element(node: Node, *conditions: str | list) -> bool: 118 """Checks if the given node is a certain element. 119 120 When providing a str it will check that the elements tag matches. 121 If a list is provided it checks that one of the conditions in the list 122 passes. 123 """ 124 125 if isinstance(node, Element): 126 if len(conditions) > 0: 127 return any( 128 bool( 129 (isinstance(condition, str) and node.tag == condition) 130 or ( 131 isinstance(condition, list) 132 and any(node.tag == nested for nested in condition) 133 ), 134 ) 135 for condition in conditions 136 ) 137 else: 138 return True 139 return False
Checks if the given node is a certain element.
When providing a str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.
149def is_embedded(node: Element) -> bool: 150 """Check to see if an element is an embedded element. 151 152 Embedded Elements: 153 154 * audio 155 * canvas 156 * embed 157 * iframe 158 * img 159 * MathML math 160 * object 161 * picture 162 * SVG svg 163 * video 164 165 Returns: 166 True if emedded 167 """ 168 # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video 169 170 return is_element( 171 node, 172 "audio", 173 "canvas", 174 "embed", 175 "iframe", 176 "img", 177 "math", 178 "object", 179 "picture", 180 "svg", 181 "video", 182 )
Check to see if an element is an embedded element.
Embedded Elements:
- audio
- canvas
- embed
- iframe
- img
- MathML math
- object
- picture
- SVG svg
- video
Returns
True if emedded
185def is_interactive(node: Element) -> bool: 186 """Check if the element is intended for user interaction. 187 188 Conditions: 189 190 * a (if the href attribute is present) 191 * audio (if the controls attribute is present) 192 * button, details, embed, iframe, img (if the usemap attribute is present) 193 * input (if the type attribute is not in the Hidden state) 194 * label, select, text, area, video (if the controls attribute is present) 195 196 Returns: 197 True if element is interactive 198 """ 199 200 if is_element(node, "a"): 201 return "href" in node 202 203 if is_element(node, "input"): 204 return "type" in node and str(node["type"]).lower() != "hidden" 205 206 if is_element(node, "img"): 207 return "usemap" in node and node["usemap"] is True 208 209 if is_element(node, "video"): 210 return "controls" in node 211 212 if is_element( 213 node, "button", "details", "embed", "iframe", "label", "select", "textarea" 214 ): 215 return True 216 217 return False
Check if the element is intended for user interaction.
Conditions:
- a (if the href attribute is present)
- audio (if the controls attribute is present)
- button, details, embed, iframe, img (if the usemap attribute is present)
- input (if the type attribute is not in the Hidden state)
- label, select, text, area, video (if the controls attribute is present)
Returns
True if element is interactive
220def is_phrasing(node: Element) -> bool: 221 """Check if a node is phrasing text according to 222 https://html.spec.whatwg.org/#phrasing-content-2. 223 224 Phrasing content is the text of the document, as well as elements that mark up that text at the 225 intra-paragraph level. Runs of phrasing content form paragraphs. 226 227 * area (if it is a descendant of a map element) 228 * link (if it is allowed in the body) 229 * meta (if the itemprop attribute is present) 230 * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, 231 em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, 232 picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, 233 template, textarea, time, u, var, video, wbr, text (true) 234 235 Returns: 236 True if the element is phrasing text 237 """ 238 239 if Literal.is_text(node): 240 return True 241 242 if is_element(node, "area"): 243 return node.parent is not None and is_element(node.parent, "map") 244 245 if is_element(node, "meta"): 246 return "itemprop" in node 247 248 if is_element(node, "link"): 249 body_ok = [ 250 "dns-prefetch", 251 "modulepreload", 252 "pingback", 253 "preconnect", 254 "prefetch", 255 "preload", 256 "prerender", 257 "stylesheet", 258 ] 259 260 return bool( 261 "itemprop" in node 262 or ( 263 "rel" in node 264 and all( 265 token in body_ok for token in str(node["rel"]).split(" ") 266 if token.strip() != "" 267 ) 268 ), 269 ) 270 271 if is_element( 272 node, 273 "node", 274 "map", 275 "mark", 276 "math", 277 "audio", 278 "b", 279 "bdi", 280 "bdo", 281 "br", 282 "button", 283 "canvas", 284 "cite", 285 "code", 286 "data", 287 "datalist", 288 "del", 289 "dfn", 290 "em", 291 "embed", 292 "i", 293 "iframe", 294 "img", 295 "input", 296 "ins", 297 "kbd", 298 "label", 299 "a", 300 "abbr", 301 "meter", 302 "noscript", 303 "object", 304 "output", 305 "picture", 306 "progress", 307 "q", 308 "ruby", 309 "s", 310 "samp", 311 "script", 312 "select", 313 "slot", 314 "small", 315 "span", 316 "strong", 317 "sub", 318 "sup", 319 "svg", 320 "template", 321 "textarea", 322 "time", 323 "u", 324 "var", 325 "video", 326 "wbr", 327 ): 328 return True 329 330 return False
Check if a node is phrasing text according to https://html.spec.whatwg.org/#phrasing-content-2.
Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs.
- area (if it is a descendant of a map element)
- link (if it is allowed in the body)
- meta (if the itemprop attribute is present)
- map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, textarea, time, u, var, video, wbr, text (true)
Returns
True if the element is phrasing text
142def is_event_handler(attribute: str) -> bool: 143 """Takes a attribute name and returns true if 144 it starts with `on` and its length is `5` or more. 145 """ 146 return attribute.startswith("on") and len(attribute) >= 5
Takes a attribute name and returns true if
it starts with on
and its length is 5
or more.
333def blank(value: Any) -> bool: 334 """Takes any value type and returns whether it is blank/None. 335 For strings if the value is stripped and is equal to '' then it is blank. 336 Otherwise if len > 0 and is not None then not blank. 337 338 Args: 339 value (Any): The value to check if it is blank. 340 341 Returns: 342 bool: True if value is blank 343 """ 344 345 if value is None or not hasattr(value, "__len__"): 346 return True 347 348 if isinstance(value, str): 349 value = value.strip() 350 351 return len(value) == 0
Takes any value type and returns whether it is blank/None. For strings if the value is stripped and is equal to '' then it is blank. Otherwise if len > 0 and is not None then not blank.
Args
- value (Any): The value to check if it is blank.
Returns
bool: True if value is blank