Coverage for phml\utilities\transform\sanitize\clean.py: 100%
63 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 14:26 -0500
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 14:26 -0500
1from __future__ import annotations
3from re import match
5from phml.nodes import Element, Node, Parent
7from .schema import Schema
10def recurse_check_tag(node: Parent, schema: Schema):
11 from phml.utilities import is_element
13 for child in list(node):
14 if isinstance(child, Element) and not is_element(child, schema.tag_names):
15 node.remove(child)
16 elif isinstance(child, Parent):
17 recurse_check_tag(child, schema)
20def recurse_check_ancestor(node: Parent, schema: Schema):
21 for child in list(node):
22 if (
23 isinstance(child, Element)
24 and child.tag in schema.ancestors
25 and (
26 not isinstance(child.parent, Element)
27 or child.parent.tag not in schema.ancestors[child.tag]
28 )
29 ):
30 node.remove(child)
31 elif isinstance(child, Element):
32 recurse_check_ancestor(child, schema)
35def build_remove_attr_list(
36 properties: dict,
37 attributes: dict[str, tuple[str | bool, ...]],
38 valid_attributes: list,
39 schema: Schema,
40):
41 """Build the list of attributes to remove from a dict of attributes."""
42 result = []
43 for attribute in properties:
44 if attribute not in valid_attributes:
45 result.append(attribute)
46 elif attribute in attributes:
47 if (
48 isinstance(properties[attribute], str)
49 and attribute in schema.protocols
50 and not check_protocols(
51 properties[attribute], schema.protocols[attribute], schema
52 )
53 ):
54 result.append(attribute)
55 elif properties[attribute] != attributes[attribute]:
56 result.append(attribute)
57 elif (
58 isinstance(properties[attribute], str)
59 and attribute in schema.protocols
60 and not check_protocols(
61 properties[attribute], schema.protocols[attribute], schema
62 )
63 ):
64 result.append(attribute)
65 return result
68def recurse_check_attributes(node: Node, schema: Schema):
69 if isinstance(node, Element):
70 if node.tag in schema.attributes:
71 pop_attrs = build_remove_attr_list(
72 node.attributes,
73 {
74 str(attr[0]): attr[1:]
75 for attr in (
76 schema.attributes[node.tag] + schema.attributes.get("*", [])
77 )
78 if isinstance(attr, tuple)
79 },
80 [
81 attr if isinstance(attr, str) else attr[0]
82 for attr in (
83 schema.attributes[node.tag] + schema.attributes.get("*", [])
84 )
85 ],
86 schema,
87 )
88 else:
89 pop_attrs = build_remove_attr_list(
90 node.attributes,
91 {
92 str(attr[0]): attr[1:]
93 for attr in schema.attributes.get("*", [])
94 if isinstance(attr, tuple)
95 },
96 [
97 attr if isinstance(attr, str) else attr[0]
98 for attr in schema.attributes.get("*", [])
99 ],
100 schema,
101 )
103 for attribute in pop_attrs:
104 node.pop(attribute, None)
106 if isinstance(node, Parent):
107 for child in node:
108 recurse_check_attributes(child, schema)
111def recurse_check_required(node: Parent, schema: Schema):
112 for child in node:
113 if isinstance(child, Element) and child.tag in schema.required:
114 for attr, value in schema.required[child.tag].items():
115 if attr not in child.attributes:
116 child[attr] = value
117 elif isinstance(value, bool):
118 child[attr] = str(value).lower()
119 elif isinstance(value, str) and child[attr] != value:
120 child[attr] = value
121 elif isinstance(child, Element):
122 recurse_check_required(child, schema)
125def check_protocols(value: str, protocols: list[str], schema: Schema):
126 return match(f"{'|'.join(protocols)}:.*", value) is not None
129def recurse_strip(node, schema: Schema):
130 from phml.utilities import is_element
132 for child in list(node):
133 if isinstance(child, Element) and is_element(child, schema.strip):
134 node.remove(child)
135 elif isinstance(child, Parent):
136 recurse_strip(child, schema)
139def sanatize(tree: Parent, schema: Schema = Schema()):
140 """Sanatize elements and attributes in the phml tree. Should be used when using
141 data from an unkown source. It should be used with an AST that has already been
142 compiled to html to no unkown values are unchecked.
144 By default the sanatization schema uses the github schema and follows the hast
145 sanatize utility.
147 * [github schema](https://github.com/syntax-tree/hast-util-sanitize/blob/main/lib/schema.js)
148 * [hast sanatize](https://github.com/syntax-tree/hast-util-sanitize)
150 Note:
151 This utility will edit the tree in place.
153 Args:
154 tree (Parent): The root of the tree that will be sanatized.
155 schema (Schema, optional): User defined schema. Defaults to github schema.
156 """
158 from phml.utilities import remove_nodes # pylint: disable=import-outside-toplevel
160 for strip in schema.strip:
161 remove_nodes(tree, ["element", {"tag": strip}])
163 recurse_check_tag(tree, schema)
164 recurse_strip(tree, schema)
165 recurse_check_ancestor(tree, schema)
166 recurse_check_attributes(tree, schema)
167 recurse_check_required(tree, schema)