phml.utils.validate.validate

  1# pylint: disable=missing-module-docstring
  2from re import match, split, sub
  3
  4from phml.nodes import All_Nodes, Comment, Element, Root, Text
  5
  6__all__ = [
  7    "validate",
  8    "parent",
  9    "literal",
 10    "generated",
 11    "has_property",
 12    "is_heading",
 13    "is_css_link",
 14    "is_css_style",
 15    "is_javascript",
 16    "is_element",
 17    "is_event_handler",
 18]
 19
 20
 21def validate(node: All_Nodes) -> bool:
 22    """Validate a node based on attributes and type."""
 23
 24    if hasattr(node, "children"):
 25        if not hasattr(node, "type"):
 26            raise AssertionError("Node should have a type")
 27
 28        if node.type not in ["root", "element"]:
 29            raise AssertionError(
 30                "Node should have a type of 'root' or 'element' to contain the 'children' attribute"
 31            )
 32
 33        if not all(isinstance(child, All_Nodes) for child in node.children):
 34            raise AssertionError("Children must be a node type")
 35
 36    if hasattr(node, "properties"):
 37        if hasattr(node, type) and node.type != "element":
 38            raise AssertionError("Node must be of type 'element' to contain 'properties'")
 39
 40        if not all(isinstance(node.properties[prop], (int, str)) for prop in node.properties):
 41            raise AssertionError("Node 'properties' must be of type 'int' or 'str'")
 42
 43    if hasattr(node, "value") and not isinstance(node.value, str):
 44        raise AssertionError("Node 'value' must be of type 'str'")
 45
 46
 47def parent(node: Root | Element) -> bool:
 48    """Validate a parent node based on attributes and type."""
 49    if not hasattr(node, "children"):
 50        raise AssertionError("Parent nodes should have the 'children' attribute")
 51
 52    if node.type == "element" and not hasattr(node, "properties"):
 53        raise AssertionError("Parent element node shoudl have the 'properties' element.")
 54
 55
 56def literal(node: Text | Comment) -> bool:
 57    """Validate a literal node based on attributes."""
 58
 59    if hasattr(node, "value"):
 60        if not isinstance(node, str):
 61            raise AssertionError("Literal nodes 'value' type should be 'str'")
 62
 63
 64def generated(node: All_Nodes) -> bool:
 65    """Checks if a node has been generated. A node is concidered
 66    generated if it does not have a position.
 67
 68    Args:
 69        node (All_Nodes): Node to check for position with.
 70
 71    Returns:
 72        bool: Whether a node has a position or not.
 73    """
 74    return hasattr(node, "position") and node.position is not None
 75
 76
 77def is_heading(node) -> bool:
 78    """Check if an element is a heading."""
 79
 80    return node.type == "element" and match(r"h[1-6]", node.tag) is not None
 81
 82
 83def is_css_link(node) -> bool:
 84    """Check if an element is a `link` to a css file.
 85
 86    Returns `true` if `node` is a `<link>` element with a `rel` list that
 87    contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'`
 88    as its `type`
 89    """
 90
 91    return (
 92        # Verify it is a element with a `link` tag
 93        is_element(node, "link")
 94        # Must have a rel list with stylesheet
 95        and has_property(node, "rel")
 96        and "stylesheet" in split(r" ", sub(r" +", " ", node.properties["rel"]))
 97        and (
 98            # Can have a `type` of `text/css` or empty or no `type`
 99            not has_property(node, "type")
100            or (
101                has_property(node, "type")
102                and (node.properties["type"] == "text/css" or node.properties["type"] == "")
103            )
104        )
105    )
106
107
108def is_css_style(node) -> bool:
109    """Check if an element is a css `style` element.
110
111    Returns `true` if `node` is a `<style>` element that
112    has no `type`, an empty `type`, or `'text/css'` as its `type`.
113    """
114
115    return is_element(node, "style") and (
116        not has_property(node, "type")
117        or (
118            has_property(node, "type")
119            and (node.properties["type"] == "" or node.properties["type"] == "text/css")
120        )
121    )
122
123
124def is_javascript(node) -> bool:
125    """Check if an element is a javascript `script` element.
126
127    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
128    `type` and a valid JavaScript `language`, or has neither.
129    """
130    return is_element(node, "script") and (
131        (
132            has_property(node, "type")
133            and node.properties["type"] in ["text/ecmascript", "text/javascript"]
134            and not has_property(node, "language")
135        )
136        or (
137            has_property(node, "language")
138            and node.properties["language"] in ["ecmascript", "javascript"]
139            and not has_property(node, "type")
140        )
141        or (not has_property(node, "type") and not has_property(node, "language"))
142    )
143
144
145def is_element(node, *conditions: str | list) -> bool:
146    """Checks if the given node is a certain element.
147
148    When providing an str it will check that the elements tag matches.
149    If a list is provided it checks that one of the conditions in the list
150    passes.
151    """
152
153    if node.type != "element":
154        return False
155
156    return bool(
157        node.type == "element"
158        and all(
159            bool(
160                (isinstance(condition, str) and node.tag == condition)
161                or (isinstance(condition, list) and all(node.tag == nested for nested in condition))
162            )
163            for condition in conditions
164        )
165    )
166
167
168def is_event_handler(attribute: str) -> bool:
169    """Takes a attribute name and returns true if
170    it starts with `on` and its length is `5` or more.
171    """
172    return attribute.startswith("on") and len(attribute) >= 5
173
174
175def has_property(node, attribute: str) -> bool:
176    """Check to see if an element has a certain property in properties."""
177    if node.type == "element":
178        if attribute in node.properties:
179            return True
180    return False
181
182
183def is_embedded(node: Element) -> bool:
184    """Check to see if an element is an embedded element.
185
186    Embedded Elements:
187
188    * audio
189    * canvas
190    * embed
191    * iframe
192    * img
193    * MathML math
194    * object
195    * picture
196    * SVG svg
197    * video
198
199    Returns:
200        True if emedded
201    """
202    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
203
204    return is_element(
205        node,
206        "audio",
207        "canvas",
208        "embed",
209        "iframe",
210        "img",
211        "math",
212        "object",
213        "picture",
214        "svg",
215        "video",
216    )
217
218
219def is_interactive(node: Element) -> bool:
220    """Check if the element is intended for user interaction.
221
222    Conditions:
223
224    * a (if the href attribute is present)
225    * audio (if the controls attribute is present)
226    * button, details, embed, iframe, img (if the usemap attribute is present)
227    * input (if the type attribute is not in the Hidden state)
228    * label, select, text, area, video (if the controls attribute is present)
229
230    Returns:
231        True if element is interactive
232    """
233
234    if is_element(node, "a"):
235        return has_property(node, "href")
236
237    if is_element(node, "input"):
238        return has_property(node, "type") and node.properties["type"].lower() != "hidden"
239
240    if is_element(node, "button", "details", "embed", "iframe", "img"):
241        return has_property(node, "usemap")
242
243    if is_element(node, "audio", "label", "select", "text", "area", "video"):
244        return has_property(node, "controls")
245
246    return False
247
248
249def is_phrasing(node: Element) -> bool:
250    """Check if a node is phrasing text according to
251    https://html.spec.whatwg.org/#phrasing-content-2.
252
253    Phrasing content is the text of the document, as well as elements that mark up that text at the
254    intra-paragraph level. Runs of phrasing content form paragraphs.
255
256    * area (if it is a descendant of a map element)
257    * link (if it is allowed in the body)
258    * meta (if the itemprop attribute is present)
259    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
260     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
261     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
262     template, textarea, time, u, var, video, wbr, text (true)
263
264    Returns:
265        True if the element is phrasing text
266    """
267
268    if isinstance(node, Text):
269        return True
270
271    if is_element(node, "area"):
272        return node.parent is not None and is_element(node.parent, "map")
273
274    if is_element(node, "meta"):
275        return has_property(node, "itemprop")
276
277    if is_element(node, "link"):
278        body_ok = [
279            "dns-prefetch",
280            "modulepreload",
281            "pingback",
282            "preconnect",
283            "prefetch",
284            "preload",
285            "prerender",
286            "stylesheet",
287        ]
288
289        return bool(
290            has_property(node, "itemprop")
291            or (
292                has_property(node, "rel")
293                and all(token.strip() in body_ok for token in node.properties["rel"].split(" "))
294            )
295        )
296
297    if is_element(
298        "node",
299        "map",
300        "mark",
301        "math",
302        "audio",
303        "b",
304        "bdi",
305        "bdo",
306        "br",
307        "button",
308        "canvas",
309        "cite",
310        "code",
311        "data",
312        "datalist",
313        "del",
314        "dfn",
315        "em",
316        "embed",
317        "i",
318        "iframe",
319        "img",
320        "input",
321        "ins",
322        "kbd",
323        "label",
324        "a",
325        "abbr",
326        "meter",
327        "noscript",
328        "object",
329        "output",
330        "picture",
331        "progress",
332        "q",
333        "ruby",
334        "s",
335        "samp",
336        "script",
337        "select",
338        "slot",
339        "small",
340        "span",
341        "strong",
342        "sub",
343        "sup",
344        "svg",
345        "template",
346        "textarea",
347        "time",
348        "u",
349        "var",
350        "video",
351        "wbr",
352    ):
353        return True
354
355    return False
def validate( node: phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> bool:
22def validate(node: All_Nodes) -> bool:
23    """Validate a node based on attributes and type."""
24
25    if hasattr(node, "children"):
26        if not hasattr(node, "type"):
27            raise AssertionError("Node should have a type")
28
29        if node.type not in ["root", "element"]:
30            raise AssertionError(
31                "Node should have a type of 'root' or 'element' to contain the 'children' attribute"
32            )
33
34        if not all(isinstance(child, All_Nodes) for child in node.children):
35            raise AssertionError("Children must be a node type")
36
37    if hasattr(node, "properties"):
38        if hasattr(node, type) and node.type != "element":
39            raise AssertionError("Node must be of type 'element' to contain 'properties'")
40
41        if not all(isinstance(node.properties[prop], (int, str)) for prop in node.properties):
42            raise AssertionError("Node 'properties' must be of type 'int' or 'str'")
43
44    if hasattr(node, "value") and not isinstance(node.value, str):
45        raise AssertionError("Node 'value' must be of type 'str'")

Validate a node based on attributes and type.

def parent(node: phml.nodes.root.Root | phml.nodes.element.Element) -> bool:
48def parent(node: Root | Element) -> bool:
49    """Validate a parent node based on attributes and type."""
50    if not hasattr(node, "children"):
51        raise AssertionError("Parent nodes should have the 'children' attribute")
52
53    if node.type == "element" and not hasattr(node, "properties"):
54        raise AssertionError("Parent element node shoudl have the 'properties' element.")

Validate a parent node based on attributes and type.

def literal(node: phml.nodes.text.Text | phml.nodes.comment.Comment) -> bool:
57def literal(node: Text | Comment) -> bool:
58    """Validate a literal node based on attributes."""
59
60    if hasattr(node, "value"):
61        if not isinstance(node, str):
62            raise AssertionError("Literal nodes 'value' type should be 'str'")

Validate a literal node based on attributes.

def generated( node: phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> bool:
65def generated(node: All_Nodes) -> bool:
66    """Checks if a node has been generated. A node is concidered
67    generated if it does not have a position.
68
69    Args:
70        node (All_Nodes): Node to check for position with.
71
72    Returns:
73        bool: Whether a node has a position or not.
74    """
75    return hasattr(node, "position") and node.position is not None

Checks if a node has been generated. A node is concidered generated if it does not have a position.

Args
  • node (All_Nodes): Node to check for position with.
Returns

bool: Whether a node has a position or not.

def has_property(node, attribute: str) -> bool:
176def has_property(node, attribute: str) -> bool:
177    """Check to see if an element has a certain property in properties."""
178    if node.type == "element":
179        if attribute in node.properties:
180            return True
181    return False

Check to see if an element has a certain property in properties.

def is_heading(node) -> bool:
78def is_heading(node) -> bool:
79    """Check if an element is a heading."""
80
81    return node.type == "element" and match(r"h[1-6]", node.tag) is not None

Check if an element is a heading.

def is_css_style(node) -> bool:
109def is_css_style(node) -> bool:
110    """Check if an element is a css `style` element.
111
112    Returns `true` if `node` is a `<style>` element that
113    has no `type`, an empty `type`, or `'text/css'` as its `type`.
114    """
115
116    return is_element(node, "style") and (
117        not has_property(node, "type")
118        or (
119            has_property(node, "type")
120            and (node.properties["type"] == "" or node.properties["type"] == "text/css")
121        )
122    )

Check if an element is a css style element.

Returns true if node is a <style> element that has no type, an empty type, or 'text/css' as its type.

def is_javascript(node) -> bool:
125def is_javascript(node) -> bool:
126    """Check if an element is a javascript `script` element.
127
128    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
129    `type` and a valid JavaScript `language`, or has neither.
130    """
131    return is_element(node, "script") and (
132        (
133            has_property(node, "type")
134            and node.properties["type"] in ["text/ecmascript", "text/javascript"]
135            and not has_property(node, "language")
136        )
137        or (
138            has_property(node, "language")
139            and node.properties["language"] in ["ecmascript", "javascript"]
140            and not has_property(node, "type")
141        )
142        or (not has_property(node, "type") and not has_property(node, "language"))
143    )

Check if an element is a javascript script element.

Returns true if node is a <script> element that has a valid JavaScript type, has no type and a valid JavaScript language, or has neither.

def is_element(node, *conditions: str | list) -> bool:
146def is_element(node, *conditions: str | list) -> bool:
147    """Checks if the given node is a certain element.
148
149    When providing an str it will check that the elements tag matches.
150    If a list is provided it checks that one of the conditions in the list
151    passes.
152    """
153
154    if node.type != "element":
155        return False
156
157    return bool(
158        node.type == "element"
159        and all(
160            bool(
161                (isinstance(condition, str) and node.tag == condition)
162                or (isinstance(condition, list) and all(node.tag == nested for nested in condition))
163            )
164            for condition in conditions
165        )
166    )

Checks if the given node is a certain element.

When providing an str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.

def is_event_handler(attribute: str) -> bool:
169def is_event_handler(attribute: str) -> bool:
170    """Takes a attribute name and returns true if
171    it starts with `on` and its length is `5` or more.
172    """
173    return attribute.startswith("on") and len(attribute) >= 5

Takes a attribute name and returns true if it starts with on and its length is 5 or more.