Coverage for phml\utilities\transform\sanitize\schema.py: 100%
16 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 14:26 -0500
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 14:26 -0500
1"""Defines the schema on how to sanitize the phml ast."""
2from __future__ import annotations
4from dataclasses import dataclass, field
7def _extend_dict_dict_(
8 origin: dict[str, dict], new: dict[str, dict]
9) -> dict[str, dict]:
10 for key, value in new.items():
11 if key not in origin:
12 origin[key] = value
13 else:
14 origin[key].update(value)
16 return origin
19def _extend_dict_list_(
20 origin: dict[str, list], new: dict[str, list]
21) -> dict[str, list]:
22 for key, value in new.items():
23 if key not in origin:
24 origin[key] = value
25 else:
26 origin[key].extend([item for item in value if item not in origin[key]])
28 return origin
31@dataclass
32class Schema:
33 """Dataclass of information on how to sanatize a phml tree.
35 `strip (list[str])`: The elements to strip from the tree.
36 `protocols (dict[str, list])`: Collection of element name and allowed protocal value list
37 `tag_names (list[str])`: List of allowed tag names.
38 `attributes (dict[str, list[str | list[str]]])`: Collection of element name and allowed property
39 names.
40 `required (dict[str, dict[str, str | bool]])`: Collection of element names and their required
41 properties and required property values.
42 """
44 strip: list[str] = field(default_factory=lambda: ["script"])
45 ancestors: dict[str, list] = field(
46 default_factory=lambda: {
47 "tbody": ["table"],
48 "tfoot": ["table"],
49 "thead": ["table"],
50 "td": ["table"],
51 "th": ["table"],
52 "tr": ["table"],
53 },
54 )
55 protocols: dict[str, list] = field(
56 default_factory=lambda: {
57 "href": ["http", "https", "mailto", "xmpp", "irc", "ircs"],
58 "cite": ["http", "https"],
59 "src": ["http", "https"],
60 "longDesc": ["http", "https"],
61 },
62 )
63 tag_names: list[str] = field(
64 default_factory=lambda: [
65 "h1",
66 "h2",
67 "h3",
68 "h4",
69 "h5",
70 "h6",
71 "br",
72 "b",
73 "i",
74 "strong",
75 "em",
76 "a",
77 "pre",
78 "code",
79 "img",
80 "tt",
81 "div",
82 "ins",
83 "del",
84 "sup",
85 "sub",
86 "p",
87 "ol",
88 "ul",
89 "table",
90 "thead",
91 "tbody",
92 "tfoot",
93 "blockquote",
94 "dl",
95 "dt",
96 "dd",
97 "kbd",
98 "q",
99 "samp",
100 "var",
101 "hr",
102 "ruby",
103 "rt",
104 "rp",
105 "li",
106 "tr",
107 "td",
108 "th",
109 "s",
110 "strike",
111 "summary",
112 "details",
113 "caption",
114 "figure",
115 "figcaption",
116 "abbr",
117 "bdo",
118 "cite",
119 "dfn",
120 "mark",
121 "small",
122 "span",
123 "time",
124 "wbr",
125 "input",
126 ],
127 )
128 attributes: dict[str, list[str | tuple[str | bool, ...]]] = field(
129 default_factory=lambda: {
130 "a": ["href"],
131 "article": ["class"],
132 "img": ["src", "longDesc", "loading"],
133 "input": [("type", "checkbox"), ("disabled", True)],
134 "li": [("class", "task-list-item")],
135 "div": ["itemScope", "itemType"],
136 "blockquote": ["cite"],
137 "del": ["cite"],
138 "ins": ["cite"],
139 "q": ["cite"],
140 "*": [
141 "abbr",
142 "accept",
143 "acceptCharset",
144 "accessKey",
145 "action",
146 "align",
147 "alt",
148 "ariaDescribedBy",
149 "ariaHidden",
150 "ariaLabel",
151 "ariaLabelledBy",
152 "axis",
153 "border",
154 "cellPadding",
155 "cellSpacing",
156 "char",
157 "charOff",
158 "charSet",
159 "checked",
160 "clear",
161 "cols",
162 "colSpan",
163 "color",
164 "compact",
165 "coords",
166 "dateTime",
167 "dir",
168 "disabled",
169 "encType",
170 "htmlFor",
171 "frame",
172 "headers",
173 "height",
174 "hrefLang",
175 "hSpace",
176 "isMap",
177 "id",
178 "label",
179 "lang",
180 "maxLength",
181 "media",
182 "method",
183 "multiple",
184 "name",
185 "noHref",
186 "noShade",
187 "noWrap",
188 "open",
189 "prompt",
190 "readOnly",
191 "rel",
192 "rev",
193 "rows",
194 "rowSpan",
195 "rules",
196 "scope",
197 "selected",
198 "shape",
199 "size",
200 "span",
201 "start",
202 "summary",
203 "tabIndex",
204 "target",
205 "title",
206 "type",
207 "useMap",
208 "vAlign",
209 "value",
210 "vSpace",
211 "width",
212 "itemProp",
213 ],
214 },
215 )
216 required: dict[str, dict[str, str | bool]] = field(
217 default_factory=lambda: {
218 "input": {
219 "type": "checkbox",
220 "disabled": True,
221 },
222 },
223 )
225 def extend(
226 self,
227 strip: list[str] | None = None,
228 ancestors: dict[str, list[str]] | None = None,
229 protocols: dict[str, list[str]] | None = None,
230 tag_names: list[str] | None = None,
231 attributes: dict[str, list[str | tuple[str | bool, ...]]] | None = None,
232 required: dict[str, dict[str, str | bool]] | None = None,
233 ) -> Schema:
234 """Extend the default schemas values.
236 Args:
237 `strip (list[str])`: The elements to strip from the tree.
238 `ancestors (dict[str, list[str]])`: Key is a element tag and the value is a list of valid
239 parent elements.
240 `protocols (dict[str, list[str]])`: Collection of element names to list of valid protocols (prefixes).
241 `tag_names (list[str])`: List of allowed tag names.
242 `attributes (dict[str, list[str | list[str]]])`: Collection of element name and allowed property
243 names.
244 `required (dict[str, dict[str, str | bool]])`: Collection of element names and their required
245 properties and required property values.
246 """
248 return Schema(
249 strip=list(set([*self.strip, *(strip or [])])),
250 ancestors=_extend_dict_list_({**self.ancestors}, ancestors or {}),
251 protocols=_extend_dict_list_({**self.protocols}, protocols or {}),
252 attributes=_extend_dict_list_({**self.attributes}, attributes or {}),
253 tag_names=list(set([*self.tag_names, *(tag_names or [])])),
254 required=_extend_dict_dict_({**self.required}, required or {}),
255 )