Coverage for phml\utils\locate\select.py: 72%
217 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-30 09:38 -0600
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-30 09:38 -0600
1"""utils.select
3A collection of utilities around querying for specific
4types of data.
5"""
7from typing import Callable
9from phml.nodes import AST, Element, Root
10from phml.utils.travel import visit_children, walk
12__all__ = ["query", "queryAll", "matches"]
15def query(tree: AST | Root | Element, specifier: str) -> Element:
16 """Same as javascripts querySelector. `#` indicates an id and `.`
17 indicates a class. If they are used alone they match anything.
18 Any tag can be used by itself or with `#` and/or `.`. You may use
19 any number of class specifiers, but may only use one id specifier per
20 tag name. Complex specifiers are accepted are allowed meaning you can
21 have space seperated specifiers indicating nesting or a parent child
22 relationship.
24 Examles:
25 * `.some-example` matches the first element with the class `some-example`
26 * `#some-example` matches the first element with the id `some-example`
27 * `li` matches the first `li` element
28 * `li.red` matches the first `li` with the class `red`
29 * `li#red` matches the first `li` with the id `red`
30 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
31 * `div.form-control input[type="checkbox"]` matches the first `input` with the
32 attribute `type="checked"` that has a parent `div` with the class `form-control`.
34 Return:
35 Element | None: The first element matching the specifier or None if no element was
36 found.
37 """
38 if isinstance(tree, AST):
39 tree = tree.tree
41 rules = __parse_specifiers(specifier)
43 def all_nodes(node: Element, rules: list, include_self: bool = True):
44 """Get all nodes starting with the current node."""
46 result = None
47 for n in walk(node):
48 if n.type == "element" and (include_self or n != node):
49 result = branch(n, rules)
50 if result is not None:
51 break
52 return result
54 def all_children(node: Element, rules: list):
55 """Get all children of the curret node."""
56 result = None
57 for n in visit_children(node):
58 if n.type == "element":
59 result = branch(n, rules)
60 if result is not None:
61 break
62 return result
64 def first_sibling(node: Element, rules: list):
65 """Get the first sibling following the node."""
66 if node.parent == None:
67 return None
69 idx = node.parent.children.index(node)
70 if idx + 1 < len(node.parent.children):
71 if node.parent.children[idx + 1].type == "element":
72 return branch(node.parent.children[idx + 1], rules)
73 return None
75 def all_siblings(node: Element, rules: list):
76 """Get all siblings after the current node."""
77 if node.parent == None:
78 return None
80 result = None
81 idx = node.parent.children.index(node)
82 if idx + 1 < len(node.parent.children):
83 for n in range(idx + 1, len(node.parent.children)):
84 if node.parent.children[n].type == "element":
85 result = branch(node.parent.children[n], rules)
86 if result is not None:
87 break
88 return result
90 def branch(node: Element, rules: list):
91 """Based on the current rule, recursively check the nodes.
92 If on the last rule then return the current valid node.
93 """
95 if len(rules) == 0:
96 return node
97 elif isinstance(rules[0], dict):
98 if is_equal(rules[0], node):
99 if len(rules) - 1 == 0:
100 return node
101 else:
102 if isinstance(rules[1], dict):
103 return all_nodes(node, rules[1:])
104 elif rules[1] == "*":
105 return all_nodes(node, rules[2:], False)
106 else:
107 return branch(node, rules[1:])
108 else:
109 return None
110 elif rules[0] == "*":
111 return all_nodes(node, rules[1:])
112 elif rules[0] == ">":
113 return all_children(node, rules[1:])
114 elif rules[0] == "+":
115 return first_sibling(node, rules[1:])
116 elif rules[0] == "~":
117 return all_siblings(node, rules[1:])
119 return all_nodes(tree, rules)
122def queryAll(tree: AST | Root | Element, specifier: str) -> list[Element]:
123 """Same as javascripts querySelectorAll. `#` indicates an id and `.`
124 indicates a class. If they are used alone they match anything.
125 Any tag can be used by itself or with `#` and/or `.`. You may use
126 any number of class specifiers, but may only use one id specifier per
127 tag name. Complex specifiers are accepted are allowed meaning you can
128 have space seperated specifiers indicating nesting or a parent child
129 relationship.
131 Examles:
132 * `.some-example` matches the first element with the class `some-example`
133 * `#some-example` matches the first element with the id `some-example`
134 * `li` matches the first `li` element
135 * `li.red` matches the first `li` with the class `red`
136 * `li#red` matches the first `li` with the id `red`
137 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
138 * `div.form-control input[type="checkbox"]` matches the first `input` with the
139 attribute `type="checked"` that has a parent `div` with the class `form-control`.
141 Return:
142 list[Element] | None: The all elements matching the specifier or and empty list if no elements were
143 found.
144 """
145 if isinstance(tree, AST):
146 tree = tree.tree
148 rules = __parse_specifiers(specifier)
150 def all_nodes(node: Element, rules: list, include_self: bool = True):
151 """Get all nodes starting with the current node."""
152 results = []
153 for n in walk(node):
154 if n.type == "element" and (include_self or n != node):
155 result = branch(n, rules)
156 if result is not None:
157 results.extend(result)
158 return results
160 def all_children(node: Element, rules: list):
161 """Get all children of the curret node."""
162 results = []
163 for n in visit_children(node):
164 if n.type == "element":
165 result = branch(n, rules)
166 if result is not None:
167 results.extend(result)
168 return results
170 def first_sibling(node: Element, rules: list):
171 """Get the first sibling following the node."""
172 if node.parent == None:
173 return []
175 idx = node.parent.children.index(node)
176 if idx + 1 < len(node.parent.children):
177 if node.parent.children[idx + 1].type == "element":
178 return [*branch(node.parent.children[idx + 1], rules)]
179 return []
181 def all_siblings(node: Element, rules: list):
182 """Get all siblings after the current node."""
183 if node.parent == None:
184 return []
186 results = []
187 idx = node.parent.children.index(node)
188 if idx + 1 < len(node.parent.children):
189 for n in range(idx + 1, len(node.parent.children)):
190 if node.parent.children[n].type == "element":
191 result = branch(node.parent.children[n], rules)
192 if result is not None:
193 results.extend(result)
194 return results
196 def branch(node: Element, rules: list):
197 """Based on the current rule, recursively check the nodes.
198 If on the last rule then return the current valid node.
199 """
200 if len(rules) == 0:
201 return [node]
202 elif isinstance(rules[0], dict):
203 if is_equal(rules[0], node):
204 if len(rules) - 1 == 0:
205 return [node]
206 else:
207 if isinstance(rules[1], dict):
208 return all_nodes(node, rules[1:])
209 elif rules[1] == "*":
210 return all_nodes(node, rules[2:], False)
211 else:
212 return branch(node, rules[1:])
213 else:
214 return None
215 elif rules[0] == "*":
216 return all_nodes(node, rules[1:])
217 elif rules[0] == ">":
218 return all_children(node, rules[1:])
219 elif rules[0] == "+":
220 return first_sibling(node, rules[1:])
221 elif rules[0] == "~":
222 return all_siblings(node, rules[1:])
224 return all_nodes(tree, rules)
227def matches(node: Element, specifier: str) -> bool:
228 """Works the same as the Javascript matches. `#` indicates an id and `.`
229 indicates a class. If they are used alone they match anything.
230 Any tag can be used by itself or with `#` and/or `.`. You may use
231 any number of class specifiers, but may only use one id specifier per
232 tag name. Complex specifiers are not supported. Everything in the specifier
233 must relate to one element/tag.
235 Examles:
236 * `.some-example` matches the first element with the class `some-example`
237 * `#some-example` matches the first element with the id `some-example`
238 * `li` matches the first `li` element
239 * `li.red` matches the first `li` with the class `red`
240 * `li#red` matches the first `li` with the id `red`
241 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
242 """
244 rules = __parse_specifiers(specifier)
246 if len(rules) > 1:
247 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}")
248 if not isinstance(rules[0], dict):
249 raise Exception(
250 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\
251Example: `li.red#sample[class^='form-'][title~='sample']`"
252 )
254 return is_equal(rules[0], node)
257def is_equal(rule: dict, node: Element) -> bool:
258 """Checks if a rule is valid on a node.
259 A rule is a dictionary of possible values and each value must
260 be valid on the node.
262 A rule may have a tag, id, classList, and attribute list:
263 * If the `tag` is provided, the nodes `tag` must match the rules `tag`
264 * If the `id` is provided, the nodes `id` must match the rules `id`
265 * If the `classList` is not empty, each class in the `classList` must exist in the nodes
266 class attribute
267 * If the `attribute` list is not empty, each attribute in the attribute list with be compared
268 against the nodes attributes given the `attribute` lists comparators. Below is the list of
269 possible comparisons.
270 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's
271 value.
272 2. Equals: `[checked='no']` yields any element with `checked='no'`
273 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class
274 containing `sample`
275 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have
276 a class that equals `sample` or or a class that starts with `sample-`
277 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample`
278 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample`
280 Args:
281 rule (dict): The rule to apply to the node.
282 node (Element): The node the validate.
284 Returns:
285 bool: Whether the node passes all the rules in the dictionary.
286 """
288 # Validate tag
289 if rule["tag"] != "*" and rule["tag"] != node.tag:
290 return False
292 # Validate id
293 if rule["id"] is not None and rule["id"] != node.properties["id"]:
294 return False
296 # Validate class list
297 if len(rule["classList"]) > 0:
298 for klass in rule["classList"]:
299 if "class" not in node.properties or klass not in node.properties["class"].split(" "):
300 return False
302 # Validate all attributes
303 if len(rule["attributes"]) > 0:
304 for attr in rule["attributes"]:
305 if attr["name"] in node.properties.keys():
306 if attr["compare"] is not None:
307 if attr["compare"] == "=":
308 if attr["value"] != node.properties[attr["name"]]:
309 return False
310 elif attr["compare"] == "|":
312 if not is_valid_attr(
313 attr=node.properties[attr["name"]],
314 sub=attr["value"],
315 validator=lambda x, y: x == y or x.startswith(f"{y}-"),
316 ):
317 return False
318 elif attr["compare"] == "^":
319 if not is_valid_attr(
320 attr=node.properties[attr["name"]],
321 sub=attr["value"],
322 validator=lambda x, y: x.startswith(y),
323 ):
324 return False
325 elif attr["compare"] == "$":
326 if not is_valid_attr(
327 attr=node.properties[attr["name"]],
328 sub=attr["value"],
329 validator=lambda x, y: x.endswith(y),
330 ):
331 return False
332 elif attr["compare"] in ["*", "~"]:
333 if not is_valid_attr(
334 attr=node.properties[attr["name"]],
335 sub=attr["value"],
336 validator=lambda x, y: y in x,
337 ):
338 return False
339 else:
340 return True
341 else:
342 return False
343 return True
346def is_valid_attr(attr: str, sub: str, validator: Callable) -> bool:
347 """Validate an attribute value with a given string and a validator callable.
348 If classlist, create list with attribute value seperated on spaces. Otherwise,
349 the list will only have the attribute value. For each item in the list, check
350 against validator, if valid add to count.
352 Returns:
353 True if the valid count is greater than 0.
354 """
355 list_attributes = ["class"]
357 compare_values = [attr]
358 if attr["name"] in list_attributes:
359 compare_values = attr.split(" ")
361 if len([item for item in compare_values if validator(item, sub)]) == 0:
362 return False
365def __parse_specifiers(specifier: str) -> dict:
366 """
367 Rules:
368 * `*` = any element
369 * `>` = Everything with certain parent child relationship
370 * `+` = first sibling
371 * `~` = All after
372 * `.` = class
373 * `#` = id
374 * `[attribute]` = all elements with attribute
375 * `[attribute=value]` = all elements with attribute=value
376 * `[attribute~=value]` = all elements with attribute containing value
377 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value-
378 * `node[attribute^=value]` = all elements with attribute starting with value
379 * `node[attribute$=value]` = all elements with attribute ending with value
380 * `node[attribute*=value]` = all elements with attribute containing value
382 """
383 from re import compile
385 splitter = compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+")
387 el_with_attr = compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*")
388 el_only_attr = compile(r"((\[[^\[\]]+\]))+")
390 el_classid_from_attr = compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)")
391 el_from_class_from_id = compile(r"(#|\.)?([a-zA-Z0-9_-]+)")
392 attr_compare_val = compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]")
394 tokens = []
395 for token in splitter.finditer(specifier):
396 if token.group() in ["*", ">", "+", "~"]:
397 tokens.append(token.group())
398 elif el_with_attr.match(token.group()):
399 element = {
400 "tag": "*",
401 "classList": [],
402 "id": None,
403 "attributes": [],
404 }
406 res = el_classid_from_attr.match(token.group())
408 el_class_id, attrs = res.group(1), res.group(2)
410 if attrs not in ["", None]:
411 for attr in attr_compare_val.finditer(attrs):
412 name, compare, value = attr.groups()
413 if value is not None:
414 value = value.lstrip("'\"").rstrip("'\"")
415 element["attributes"].append(
416 {
417 "name": name,
418 "compare": compare,
419 "value": value,
420 }
421 )
423 if el_class_id not in ["", None]:
424 for item in el_from_class_from_id.finditer(el_class_id):
425 if item.group(1) == ".":
426 if item.group(2) not in element["classList"]:
427 element["classList"].append(item.group(2))
428 elif item.group(1) == "#":
429 if element["id"] is None:
430 element["id"] = item.group(2)
431 else:
432 raise Exception(
433 f"There may only be one id per element specifier.\n{token.group()}"
434 )
435 else:
436 element["tag"] = item.group(2) or "*"
438 tokens.append(element)
439 elif el_only_attr.match(token.group()):
440 element = {
441 "tag": None,
442 "classList": [],
443 "id": None,
444 "attributes": [],
445 }
447 element["tag"] = "*"
449 if token.group() not in ["", None]:
450 for attr in attr_compare_val.finditer(token.group()):
451 name, compare, value = attr.groups()
452 if value is not None:
453 value = value.lstrip("'\"").rstrip("'\"")
454 element["attributes"].append(
455 {
456 "name": name,
457 "compare": compare,
458 "value": value,
459 }
460 )
462 tokens.append(element)
464 return tokens