Coverage for phml\utilities\locate\select.py: 100%
214 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 14:26 -0500
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 14:26 -0500
1"""utilities.select
3A collection of utilities around querying for specific
4types of data.
5"""
7import re
8from typing import Callable
10from phml.nodes import Element, Node, Parent
11from phml.utilities.travel.travel import walk
13__all__ = ["query", "query_all", "matches", "parse_specifiers"]
16def query(tree: Parent, specifier: str) -> Element | None:
17 """Same as javascripts querySelector. `#` indicates an id and `.`
18 indicates a class. If they are used alone they match anything.
19 Any tag can be used by itself or with `#` and/or `.`. You may use
20 any number of class specifiers, but may only use one id specifier per
21 tag name. Complex specifiers are accepted are allowed meaning you can
22 have space seperated specifiers indicating nesting or a parent child
23 relationship.
25 Rules:
26 * `*` = any element
27 * `>` = direct child of the current element
28 * `+` = first sibling
29 * `~` = elements after the current element
30 * `.` = class
31 * `#` = id
32 * `[attribute]` = elements with attribute
33 * `[attribute=value]` = elements with attribute=value
34 * `[attribute~=value]` = elements with attribute containing value
35 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
36 * `[attribute^=value]` = elements with an attribute starting with value
37 * `[attribute$=value]` = elements with an attribute ending with value
38 * `[attribute*=value]` = elements with an attribute containing value
40 Examles:
41 * `.some-example` matches the first element with the class `some-example`
42 * `#some-example` matches the first element with the id `some-example`
43 * `li` matches the first `li` element
44 * `li.red` matches the first `li` with the class `red`
45 * `li#red` matches the first `li` with the id `red`
46 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
47 * `div.form-control input[type="checkbox"]` matches the first `input` with the
48 attribute `type="checked"` that has a parent `div` with the class `form-control`.
50 Return:
51 Element | None: The first element matching the specifier or None if no element was
52 found.
53 """
55 def all_nodes(current: Parent, rules: list, include_self: bool = True):
56 """Get all nodes starting with the current node."""
58 result = None
59 for node in walk(current):
60 if isinstance(node, Element) and (include_self or node != current):
61 result = branch(node, rules)
62 if result is not None:
63 break
64 return result
66 def all_children(current: Parent, rules: list):
67 """Get all children of the curret node."""
68 result = None
69 for node in current:
70 if isinstance(node, Element):
71 result = branch(node, rules)
72 if result is not None:
73 break
74 return result
76 def first_sibling(node: Parent, rules: list):
77 """Get the first sibling following the node."""
78 if node.parent is None:
79 return None
81 idx = node.parent.index(node)
82 if idx + 1 < len(node.parent) and isinstance(node.parent[idx + 1], Element):
83 return branch(node.parent[idx + 1], rules)
84 return None
86 def all_siblings(current: Parent, rules: list):
87 """Get all siblings after the current node."""
88 if current.parent is None:
89 return None
91 result = None
92 idx = current.parent.index(current)
93 if idx + 1 < len(current.parent):
94 for node in range(idx + 1, len(current.parent)):
95 if isinstance(current.parent[node], Element):
96 result = branch(current.parent[node], rules)
97 if result is not None:
98 break
99 return result
101 def process_dict(rules: list, node: Element):
102 if is_equal(rules[0], node):
103 if len(rules) - 1 == 0:
104 return node
106 if isinstance(rules[1], dict) or rules[1] == "*":
107 return (
108 all_nodes(node, rules[1:], False)
109 if isinstance(rules[1], dict)
110 else all_nodes(node, rules[2:], False)
111 )
113 return branch(node, rules[1:])
114 return None
116 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements
117 """Based on the current rule, recursively check the nodes.
118 If on the last rule then return the current valid node.
119 """
121 if isinstance(node, Parent):
122 if len(rules) == 0:
123 return node
125 if isinstance(rules[0], dict) and isinstance(node, Element):
126 return process_dict(rules, node)
128 if rules[0] == "*":
129 return all_nodes(node, rules[1:])
131 if rules[0] == ">":
132 return all_children(node, rules[1:])
134 if rules[0] == "+":
135 return first_sibling(node, rules[1:])
137 if rules[0] == "~":
138 return all_siblings(node, rules[1:])
140 rules = parse_specifiers(specifier)
141 return all_nodes(tree, rules)
144def query_all(tree: Parent, specifier: str) -> list[Element]:
145 """Same as javascripts querySelectorAll. `#` indicates an id and `.`
146 indicates a class. If they are used alone they match anything.
147 Any tag can be used by itself or with `#` and/or `.`. You may use
148 any number of class specifiers, but may only use one id specifier per
149 tag name. Complex specifiers are accepted are allowed meaning you can
150 have space seperated specifiers indicating nesting or a parent child
151 relationship.
153 Rules:
154 * `*` = any element
155 * `>` = direct child of the current element
156 * `+` = first sibling
157 * `~` = elements after the current element
158 * `.` = class
159 * `#` = id
160 * `[attribute]` = elements with attribute
161 * `[attribute=value]` = elements with attribute=value
162 * `[attribute~=value]` = elements with attribute containing value
163 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
164 * `[attribute^=value]` = elements with an attribute starting with value
165 * `[attribute$=value]` = elements with an attribute ending with value
166 * `[attribute*=value]` = elements with an attribute containing value
168 Examles:
169 * `.some-example` matches the first element with the class `some-example`
170 * `#some-example` matches the first element with the id `some-example`
171 * `li` matches the first `li` element
172 * `li.red` matches the first `li` with the class `red`
173 * `li#red` matches the first `li` with the id `red`
174 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
175 * `div.form-control input[type="checkbox"]` matches the first `input` with the
176 attribute `type="checked"` that has a parent `div` with the class `form-control`.
178 Return:
179 list[Element] | None: The all elements matching the specifier or and empty list if no
180 elements were found.
181 """
183 def all_nodes(current: Parent, rules: list, include_self: bool = True):
184 """Get all nodes starting with the current node."""
185 results = []
186 for node in walk(current):
187 if isinstance(node, Element) and (include_self or node != current):
188 results.extend(branch(node, rules) or [])
189 return results
191 def all_children(current: Parent, rules: list):
192 """Get all children of the curret node."""
193 results = []
194 for node in current:
195 if isinstance(node, Element):
196 results.extend(branch(node, rules) or [])
197 return results
199 def first_sibling(node: Parent, rules: list):
200 """Get the first sibling following the node."""
201 if node.parent is None:
202 return []
204 idx = node.parent.index(node)
205 if idx + 1 < len(node.parent) and node.parent[idx + 1].type == "element":
206 result = branch(node.parent[idx + 1], rules)
207 return result
208 return []
210 def all_siblings(current: Parent, rules: list):
211 """Get all siblings after the current node."""
212 if current.parent is None:
213 return []
215 results = []
216 idx = current.parent.index(current)
217 if idx + 1 < len(current.parent):
218 for node in range(idx + 1, len(current.parent)):
219 if current.parent[node].type == "element":
220 results.extend(branch(current.parent[node], rules) or [])
221 return results
223 def process_dict(rules: list, node: Element):
224 if is_equal(rules[0], node):
225 if len(rules) - 1 == 0:
226 return [node]
228 if isinstance(rules[1], dict) or rules[1] == "*":
229 return (
230 all_nodes(node, rules[1:])
231 if isinstance(rules[1], dict)
232 else all_nodes(node, rules[2:], False)
233 )
235 return branch(node, rules[1:])
236 return []
238 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements
239 """Based on the current rule, recursively check the nodes.
240 If on the last rule then return the current valid node.
241 """
243 if isinstance(node, Parent):
244 if len(rules) == 0:
245 return [node]
247 if isinstance(rules[0], dict) and isinstance(node, Element):
248 return process_dict(rules, node)
250 if rules[0] == "*":
251 return all_nodes(node, rules[1:])
253 if rules[0] == ">":
254 return all_children(node, rules[1:])
256 if rules[0] == "+":
257 return first_sibling(node, rules[1:])
259 if rules[0] == "~":
260 return all_siblings(node, rules[1:])
262 rules = parse_specifiers(specifier)
263 return all_nodes(tree, rules)
264 # return [result[i] for i in range(len(result)) if i == result.index(result[i])]
267def matches(node: Element, specifier: str) -> bool:
268 """Works the same as the Javascript matches. `#` indicates an id and `.`
269 indicates a class. If they are used alone they match anything.
270 Any tag can be used by itself or with `#` and/or `.`. You may use
271 any number of class specifiers, but may only use one id specifier per
272 tag name. Complex specifiers are not supported. Everything in the specifier
273 must relate to one element/tag.
275 Rules:
276 * `.` = class
277 * `#` = id
278 * `[attribute]` = elements with attribute
279 * `[attribute=value]` = elements with attribute=value
280 * `[attribute~=value]` = elements with attribute containing value
281 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
282 * `[attribute^=value]` = elements with an attribute starting with value
283 * `[attribute$=value]` = elements with an attribute ending with value
284 * `[attribute*=value]` = elements with an attribute containing value
286 Examles:
287 * `.some-example` matches the element with the class `some-example`
288 * `#some-example` matches the element with the id `some-example`
289 * `li` matches an `li` element
290 * `li.red` matches the an `li` with the class `red`
291 * `li#red` matches the an `li` with the id `red`
292 * `input[type="checkbox"]` matches the `input` element with the attribute `type="checkbox"`
293 """
295 rules = parse_specifiers(specifier)
297 if len(rules) > 1:
298 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}")
299 if not isinstance(rules[0], dict):
300 raise Exception(
301 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\
302Example: `li.red#sample[class^='form-'][title~='sample']`",
303 )
305 return is_equal(rules[0], node)
308def is_equal(rule: dict, node: Node) -> bool:
309 """Checks if a rule is valid on a node.
310 A rule is a dictionary of possible values and each value must
311 be valid on the node.
313 A rule may have a tag, id, classList, and attribute list:
314 * If the `tag` is provided, the nodes `tag` must match the rules `tag`
315 * If the `id` is provided, the nodes `id` must match the rules `id`
316 * If the `classList` is not empty, each class in the `classList` must exist in the nodes
317 class attribute
318 * If the `attribute` list is not empty, each attribute in the attribute list with be compared
319 against the nodes attributes given the `attribute` lists comparators. Below is the list of
320 possible comparisons.
321 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's
322 value.
323 2. Equals: `[checked='no']` yields any element with `checked='no'`
324 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class
325 containing `sample`
326 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have
327 a class that equals `sample` or or a class that starts with `sample-`
328 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample`
329 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample`
331 Args:
332 rule (dict): The rule to apply to the node.
333 node (Element): The node the validate.
335 Returns:
336 bool: Whether the node passes all the rules in the dictionary.
337 """
338 # Validate tag
339 if rule["tag"] != "*" and rule["tag"] != node.tag:
340 return False
342 # Validate id
343 if rule["id"] is not None and ("id" not in node or rule["id"] != node["id"]):
344 return False
346 # Validate class list
347 if len(rule["classList"]) > 0:
348 for klass in rule["classList"]:
349 if "class" not in node or klass not in str(node["class"]).split(" "):
350 return False
352 # Validate all attributes
353 if len(rule["attributes"]) > 0:
354 return all(
355 attr["name"] in node.attributes and __validate_attr(attr, node)
356 for attr in rule["attributes"]
357 )
359 return True
362def compare_equal(attr: str, c_value: str) -> bool:
363 return attr == c_value
366def compare_equal_or_start_with_value_dash(attr: str, c_value: str) -> bool:
367 return attr == c_value or attr.startswith(f"{c_value}-")
370def compare_startswith(attr: str, c_value: str) -> bool:
371 return attr.startswith(c_value)
374def compare_endswith(attr: str, c_value: str) -> bool:
375 return attr.endswith(c_value)
378def compare_contains(attr: str, c_value: str) -> bool:
379 return c_value in attr
382def compare_exists(attr: str, _) -> bool:
383 return attr == "true"
386def __validate_attr(attr: dict, node: Element):
387 attribute = node[attr["name"]]
388 if isinstance(attribute, bool):
389 attribute = str(node[attr["name"]]).lower()
391 if attr["compare"] == "=":
392 return is_valid_attr(
393 attr=attribute,
394 sub=attr["value"],
395 name=attr["name"],
396 validator=compare_equal,
397 )
399 if attr["compare"] == "|=":
400 return is_valid_attr(
401 attr=attribute,
402 sub=attr["value"],
403 name=attr["name"],
404 validator=compare_equal_or_start_with_value_dash,
405 )
407 if attr["compare"] == "^=":
408 return is_valid_attr(
409 attr=attribute,
410 sub=attr["value"],
411 name=attr["name"],
412 validator=compare_startswith,
413 )
415 if attr["compare"] == "$=":
416 return is_valid_attr(
417 attr=attribute,
418 sub=attr["value"],
419 name=attr["name"],
420 validator=compare_endswith,
421 )
423 if attr["compare"] in ["*=", "~="]:
424 return is_valid_attr(
425 attr=attribute,
426 sub=attr["value"],
427 name=attr["name"],
428 validator=compare_contains,
429 )
431 if attr["compare"] == "" and attr["value"] == "":
432 return is_valid_attr(
433 attr=attribute,
434 sub=attr["value"],
435 name=attr["name"],
436 validator=compare_exists,
437 )
440def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool:
441 """Validate an attribute value with a given string and a validator callable.
442 If classlist, create list with attribute value seperated on spaces. Otherwise,
443 the list will only have the attribute value. For each item in the list, check
444 against validator, if valid add to count.
446 Returns:
447 True if the valid count is greater than 0.
448 """
449 list_attributes = ["class"]
451 compare_values = [attr]
452 if name in list_attributes:
453 compare_values = attr.split(" ")
455 return bool(len([item for item in compare_values if validator(item, sub)]) > 0)
458def __parse_el_with_attribute(
459 tag: str | None, context: str | None, attributes: str | None
460) -> dict:
461 el_from_class_from_id = re.compile(r"(#|\.)([\w\-]+)")
463 attr_compare_val = re.compile(
464 r"\[\s*([\w\-:@]+)\s*([\~\|\^\$\*]?=)?\s*(\"[^\"\[\]=]*\"|\'[^\'\[\]=]*\'|[^\s\[\]=\"']+)?\s*\]"
465 )
466 re.compile(r"\[\s*([\w\-:@]+)\]")
468 element = {
469 "tag": tag or "*",
470 "classList": [],
471 "id": None,
472 "attributes": [],
473 }
475 if attributes is not None:
476 for attr in attr_compare_val.findall(attributes):
477 name, compare, value = attr
478 if value is not None:
479 value = value.lstrip("'\"").rstrip("'\"")
480 element["attributes"].append(
481 {
482 "name": name,
483 "compare": compare,
484 "value": value,
485 },
486 )
488 if context is not None:
489 for part in el_from_class_from_id.finditer(context):
490 if part.group(1) == ".":
491 if part.group(2) not in element["classList"]:
492 element["classList"].append(part.group(2))
493 elif part.group(1) == "#":
494 if element["id"] is None:
495 element["id"] = part.group(2)
496 else:
497 raise Exception(
498 f"There may only be one id per element specifier. '{(tag or '') + (context or '')}{attributes or ''}'",
499 )
500 return element
503def __parse_attr_only_element(token: str) -> dict:
504 attr_compare_val = re.compile(
505 r"\[([a-zA-Z0-9_:\-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]"
506 )
508 element = {
509 "tag": None,
510 "classList": [],
511 "id": None,
512 "attributes": [],
513 }
515 element["tag"] = "*"
517 if token not in ["", None]:
518 for attr in attr_compare_val.finditer(token):
519 name, compare, value = attr.groups()
520 if value is not None:
521 value = value.lstrip("'\"").rstrip("'\"")
522 element["attributes"].append(
523 {
524 "name": name,
525 "compare": compare,
526 "value": value,
527 },
528 )
530 return element
533def parse_specifiers(specifier: str) -> list:
534 """
535 Rules:
536 * `*` = any element
537 * `>` = direct child of the current element
538 * `+` = first sibling
539 * `~` = elements after the current element
540 * `.` = class
541 * `#` = id
542 * `[attribute]` = elements with attribute
543 * `[attribute=value]` = elements with attribute=value
544 * `[attribute~=value]` = elements with attribute containing value
545 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
546 * `[attribute^=value]` = elements with an attribute starting with value
547 * `[attribute$=value]` = elements with an attribute ending with value
548 * `[attribute*=value]` = elements with an attribute containing value
549 """
550 splitter = re.compile(
551 r"([~>\*+])|((?:\[[^\[\]]+\])+)|([^.#\[\]\s]+)?((?:(?:\.|#)[^.#\[\]\s]+)+)?((?:\[[^\[\]]+\])+)?"
552 )
554 tokens = []
555 for token in splitter.finditer(specifier):
556 (
557 sibling,
558 just_attributes,
559 tag,
560 context,
561 attributes,
562 ) = token.groups()
563 if sibling in ["*", ">", "+", "~"]:
564 tokens.append(sibling)
565 elif tag is not None or context is not None or attributes is not None:
566 tokens.append(__parse_el_with_attribute(tag, context, attributes))
567 elif just_attributes is not None:
568 tokens.append(__parse_attr_only_element(just_attributes))
569 return tokens