phml.utilities.locate.select
utilities.select
A collection of utilities around querying for specific types of data.
1"""utilities.select 2 3A collection of utilities around querying for specific 4types of data. 5""" 6 7# PERF: Support for all `:` selectors from https://www.w3schools.com/cssref/css_selectors.php 8# - Strip all `::` selectors and `:` not supported by phml implementation 9# - This will allow for parsing of css selectors and and adding scoping to component style elements 10# Add a data-phml-style-scope attribute to matching elements in the components. Edit the selector to then 11# have :is([data-phml-style-scope="phml-<hash>"])<selector> 12 13import re 14from typing import Callable 15 16from phml.nodes import Element, Node, Parent 17from phml.utilities.travel.travel import walk 18 19__all__ = ["query", "query_all", "matches", "parse_specifiers"] 20 21 22def query(tree: Parent, specifier: str) -> Element | None: 23 """Same as javascripts querySelector. `#` indicates an id and `.` 24 indicates a class. If they are used alone they match anything. 25 Any tag can be used by itself or with `#` and/or `.`. You may use 26 any number of class specifiers, but may only use one id specifier per 27 tag name. Complex specifiers are accepted are allowed meaning you can 28 have space seperated specifiers indicating nesting or a parent child 29 relationship. 30 31 Rules: 32 * `*` = any element 33 * `>` = direct child of the current element 34 * `+` = first sibling 35 * `~` = elements after the current element 36 * `.` = class 37 * `#` = id 38 * `[attribute]` = elements with attribute 39 * `[attribute=value]` = elements with attribute=value 40 * `[attribute~=value]` = elements with attribute containing value 41 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 42 * `[attribute^=value]` = elements with an attribute starting with value 43 * `[attribute$=value]` = elements with an attribute ending with value 44 * `[attribute*=value]` = elements with an attribute containing value 45 46 Examles: 47 * `.some-example` matches the first element with the class `some-example` 48 * `#some-example` matches the first element with the id `some-example` 49 * `li` matches the first `li` element 50 * `li.red` matches the first `li` with the class `red` 51 * `li#red` matches the first `li` with the id `red` 52 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 53 * `div.form-control input[type="checkbox"]` matches the first `input` with the 54 attribute `type="checked"` that has a parent `div` with the class `form-control`. 55 56 Return: 57 Element | None: The first element matching the specifier or None if no element was 58 found. 59 """ 60 61 def all_nodes(current: Parent, rules: list, include_self: bool = True): 62 """Get all nodes starting with the current node.""" 63 64 result = None 65 for node in walk(current): 66 if isinstance(node, Element) and (include_self or node != current): 67 result = branch(node, rules) 68 if result is not None: 69 break 70 return result 71 72 def all_children(current: Parent, rules: list): 73 """Get all children of the curret node.""" 74 result = None 75 for node in current: 76 if isinstance(node, Element): 77 result = branch(node, rules) 78 if result is not None: 79 break 80 return result 81 82 def first_sibling(node: Parent, rules: list): 83 """Get the first sibling following the node.""" 84 if node.parent is None: 85 return None 86 87 idx = node.parent.index(node) 88 if idx + 1 < len(node.parent) and isinstance(node.parent[idx + 1], Element): 89 return branch(node.parent[idx + 1], rules) 90 return None 91 92 def all_siblings(current: Parent, rules: list): 93 """Get all siblings after the current node.""" 94 if current.parent is None: 95 return None 96 97 result = None 98 idx = current.parent.index(current) 99 if idx + 1 < len(current.parent): 100 for node in range(idx + 1, len(current.parent)): 101 if isinstance(current.parent[node], Element): 102 result = branch(current.parent[node], rules) 103 if result is not None: 104 break 105 return result 106 107 def process_dict(rules: list, node: Element): 108 if is_equal(rules[0], node): 109 if len(rules) - 1 == 0: 110 return node 111 112 if isinstance(rules[1], dict) or rules[1] == "*": 113 return ( 114 all_nodes(node, rules[1:], False) 115 if isinstance(rules[1], dict) 116 else all_nodes(node, rules[2:], False) 117 ) 118 119 return branch(node, rules[1:]) 120 return None 121 122 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements 123 """Based on the current rule, recursively check the nodes. 124 If on the last rule then return the current valid node. 125 """ 126 127 if isinstance(node, Parent): 128 if len(rules) == 0: 129 return node 130 131 if isinstance(rules[0], dict) and isinstance(node, Element): 132 return process_dict(rules, node) 133 134 if rules[0] == "*": 135 return all_nodes(node, rules[1:]) 136 137 if rules[0] == ">": 138 return all_children(node, rules[1:]) 139 140 if rules[0] == "+": 141 return first_sibling(node, rules[1:]) 142 143 if rules[0] == "~": 144 return all_siblings(node, rules[1:]) 145 146 rules = parse_specifiers(specifier) 147 return all_nodes(tree, rules) 148 149 150def query_all(tree: Parent, specifier: str) -> list[Element]: 151 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 152 indicates a class. If they are used alone they match anything. 153 Any tag can be used by itself or with `#` and/or `.`. You may use 154 any number of class specifiers, but may only use one id specifier per 155 tag name. Complex specifiers are accepted are allowed meaning you can 156 have space seperated specifiers indicating nesting or a parent child 157 relationship. 158 159 Rules: 160 * `*` = any element 161 * `>` = direct child of the current element 162 * `+` = first sibling 163 * `~` = elements after the current element 164 * `.` = class 165 * `#` = id 166 * `[attribute]` = elements with attribute 167 * `[attribute=value]` = elements with attribute=value 168 * `[attribute~=value]` = elements with attribute containing value 169 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 170 * `[attribute^=value]` = elements with an attribute starting with value 171 * `[attribute$=value]` = elements with an attribute ending with value 172 * `[attribute*=value]` = elements with an attribute containing value 173 174 Examles: 175 * `.some-example` matches the first element with the class `some-example` 176 * `#some-example` matches the first element with the id `some-example` 177 * `li` matches the first `li` element 178 * `li.red` matches the first `li` with the class `red` 179 * `li#red` matches the first `li` with the id `red` 180 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 181 * `div.form-control input[type="checkbox"]` matches the first `input` with the 182 attribute `type="checked"` that has a parent `div` with the class `form-control`. 183 184 Return: 185 list[Element] | None: The all elements matching the specifier or and empty list if no 186 elements were found. 187 """ 188 189 def all_nodes(current: Parent, rules: list, include_self: bool = True): 190 """Get all nodes starting with the current node.""" 191 results = [] 192 for node in walk(current): 193 if isinstance(node, Element) and (include_self or node != current): 194 results.extend(branch(node, rules) or []) 195 return results 196 197 def all_children(current: Parent, rules: list): 198 """Get all children of the curret node.""" 199 results = [] 200 for node in current: 201 if isinstance(node, Element): 202 results.extend(branch(node, rules) or []) 203 return results 204 205 def first_sibling(node: Parent, rules: list): 206 """Get the first sibling following the node.""" 207 if node.parent is None: 208 return [] 209 210 idx = node.parent.index(node) 211 if idx + 1 < len(node.parent) and node.parent[idx + 1].type == "element": 212 result = branch(node.parent[idx + 1], rules) 213 return result 214 return [] 215 216 def all_siblings(current: Parent, rules: list): 217 """Get all siblings after the current node.""" 218 if current.parent is None: 219 return [] 220 221 results = [] 222 idx = current.parent.index(current) 223 if idx + 1 < len(current.parent): 224 for node in range(idx + 1, len(current.parent)): 225 if current.parent[node].type == "element": 226 results.extend(branch(current.parent[node], rules) or []) 227 return results 228 229 def process_dict(rules: list, node: Element): 230 if is_equal(rules[0], node): 231 if len(rules) - 1 == 0: 232 return [node] 233 234 if isinstance(rules[1], dict) or rules[1] == "*": 235 return ( 236 all_nodes(node, rules[1:]) 237 if isinstance(rules[1], dict) 238 else all_nodes(node, rules[2:], False) 239 ) 240 241 return branch(node, rules[1:]) 242 return [] 243 244 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements 245 """Based on the current rule, recursively check the nodes. 246 If on the last rule then return the current valid node. 247 """ 248 249 if isinstance(node, Parent): 250 if len(rules) == 0: 251 return [node] 252 253 if isinstance(rules[0], dict) and isinstance(node, Element): 254 return process_dict(rules, node) 255 256 if rules[0] == "*": 257 return all_nodes(node, rules[1:]) 258 259 if rules[0] == ">": 260 return all_children(node, rules[1:]) 261 262 if rules[0] == "+": 263 return first_sibling(node, rules[1:]) 264 265 if rules[0] == "~": 266 return all_siblings(node, rules[1:]) 267 268 rules = parse_specifiers(specifier) 269 return all_nodes(tree, rules) 270 # return [result[i] for i in range(len(result)) if i == result.index(result[i])] 271 272 273def matches(node: Element, specifier: str) -> bool: 274 """Works the same as the Javascript matches. `#` indicates an id and `.` 275 indicates a class. If they are used alone they match anything. 276 Any tag can be used by itself or with `#` and/or `.`. You may use 277 any number of class specifiers, but may only use one id specifier per 278 tag name. Complex specifiers are not supported. Everything in the specifier 279 must relate to one element/tag. 280 281 Rules: 282 * `.` = class 283 * `#` = id 284 * `[attribute]` = elements with attribute 285 * `[attribute=value]` = elements with attribute=value 286 * `[attribute~=value]` = elements with attribute containing value 287 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 288 * `[attribute^=value]` = elements with an attribute starting with value 289 * `[attribute$=value]` = elements with an attribute ending with value 290 * `[attribute*=value]` = elements with an attribute containing value 291 292 Examles: 293 * `.some-example` matches the element with the class `some-example` 294 * `#some-example` matches the element with the id `some-example` 295 * `li` matches an `li` element 296 * `li.red` matches the an `li` with the class `red` 297 * `li#red` matches the an `li` with the id `red` 298 * `input[type="checkbox"]` matches the `input` element with the attribute `type="checkbox"` 299 """ 300 301 rules = parse_specifiers(specifier) 302 303 if len(rules) > 1: 304 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 305 if not isinstance(rules[0], dict): 306 raise Exception( 307 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 308Example: `li.red#sample[class^='form-'][title~='sample']`", 309 ) 310 311 return is_equal(rules[0], node) 312 313 314def is_equal(rule: dict, node: Node) -> bool: 315 """Checks if a rule is valid on a node. 316 A rule is a dictionary of possible values and each value must 317 be valid on the node. 318 319 A rule may have a tag, id, classList, and attribute list: 320 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 321 * If the `id` is provided, the nodes `id` must match the rules `id` 322 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 323 class attribute 324 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 325 against the nodes attributes given the `attribute` lists comparators. Below is the list of 326 possible comparisons. 327 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 328 value. 329 2. Equals: `[checked='no']` yields any element with `checked='no'` 330 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 331 containing `sample` 332 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 333 a class that equals `sample` or or a class that starts with `sample-` 334 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 335 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 336 337 Args: 338 rule (dict): The rule to apply to the node. 339 node (Element): The node the validate. 340 341 Returns: 342 bool: Whether the node passes all the rules in the dictionary. 343 """ 344 # Validate tag 345 if rule["tag"] != "*" and rule["tag"] != node.tag: 346 return False 347 348 # Validate id 349 if rule["id"] is not None and ("id" not in node or rule["id"] != node["id"]): 350 return False 351 352 # Validate class list 353 if len(rule["classList"]) > 0: 354 for klass in rule["classList"]: 355 if "class" not in node or klass not in str(node["class"]).split(" "): 356 return False 357 358 # Validate all attributes 359 if len(rule["attributes"]) > 0: 360 return all( 361 attr["name"] in node.attributes and __validate_attr(attr, node) 362 for attr in rule["attributes"] 363 ) 364 365 return True 366 367 368def compare_equal(attr: str, c_value: str) -> bool: 369 return attr == c_value 370 371 372def compare_equal_or_start_with_value_dash(attr: str, c_value: str) -> bool: 373 return attr == c_value or attr.startswith(f"{c_value}-") 374 375 376def compare_startswith(attr: str, c_value: str) -> bool: 377 return attr.startswith(c_value) 378 379 380def compare_endswith(attr: str, c_value: str) -> bool: 381 return attr.endswith(c_value) 382 383 384def compare_contains(attr: str, c_value: str) -> bool: 385 return c_value in attr 386 387 388def compare_exists(attr: str, _) -> bool: 389 return attr == "true" 390 391 392def __validate_attr(attr: dict, node: Element): 393 attribute = node[attr["name"]] 394 if isinstance(attribute, bool): 395 attribute = str(node[attr["name"]]).lower() 396 397 if attr["compare"] == "=": 398 return is_valid_attr( 399 attr=attribute, 400 sub=attr["value"], 401 name=attr["name"], 402 validator=compare_equal, 403 ) 404 405 if attr["compare"] == "|=": 406 return is_valid_attr( 407 attr=attribute, 408 sub=attr["value"], 409 name=attr["name"], 410 validator=compare_equal_or_start_with_value_dash, 411 ) 412 413 if attr["compare"] == "^=": 414 return is_valid_attr( 415 attr=attribute, 416 sub=attr["value"], 417 name=attr["name"], 418 validator=compare_startswith, 419 ) 420 421 if attr["compare"] == "$=": 422 return is_valid_attr( 423 attr=attribute, 424 sub=attr["value"], 425 name=attr["name"], 426 validator=compare_endswith, 427 ) 428 429 if attr["compare"] in ["*=", "~="]: 430 return is_valid_attr( 431 attr=attribute, 432 sub=attr["value"], 433 name=attr["name"], 434 validator=compare_contains, 435 ) 436 437 if attr["compare"] == "" and attr["value"] == "": 438 return is_valid_attr( 439 attr=attribute, 440 sub=attr["value"], 441 name=attr["name"], 442 validator=compare_exists, 443 ) 444 445 446def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool: 447 """Validate an attribute value with a given string and a validator callable. 448 If classlist, create list with attribute value seperated on spaces. Otherwise, 449 the list will only have the attribute value. For each item in the list, check 450 against validator, if valid add to count. 451 452 Returns: 453 True if the valid count is greater than 0. 454 """ 455 list_attributes = ["class"] 456 457 compare_values = [attr] 458 if name in list_attributes: 459 compare_values = attr.split(" ") 460 461 return bool(len([item for item in compare_values if validator(item, sub)]) > 0) 462 463 464def __parse_el_with_attribute( 465 tag: str | None, context: str | None, attributes: str | None 466) -> dict: 467 el_from_class_from_id = re.compile(r"(#|\.)([\w\-]+)") 468 469 attr_compare_val = re.compile( 470 r"\[\s*([\w\-:@]+)\s*([\~\|\^\$\*]?=)?\s*(\"[^\"\[\]=]*\"|\'[^\'\[\]=]*\'|[^\s\[\]=\"']+)?\s*\]" 471 ) 472 re.compile(r"\[\s*([\w\-:@]+)\]") 473 474 element = { 475 "tag": tag or "*", 476 "classList": [], 477 "id": None, 478 "attributes": [], 479 } 480 481 if attributes is not None: 482 for attr in attr_compare_val.findall(attributes): 483 name, compare, value = attr 484 if value is not None: 485 value = value.lstrip("'\"").rstrip("'\"") 486 element["attributes"].append( 487 { 488 "name": name, 489 "compare": compare, 490 "value": value, 491 }, 492 ) 493 494 if context is not None: 495 for part in el_from_class_from_id.finditer(context): 496 if part.group(1) == ".": 497 if part.group(2) not in element["classList"]: 498 element["classList"].append(part.group(2)) 499 elif part.group(1) == "#": 500 if element["id"] is None: 501 element["id"] = part.group(2) 502 else: 503 raise Exception( 504 f"There may only be one id per element specifier. '{(tag or '') + (context or '')}{attributes or ''}'", 505 ) 506 return element 507 508 509def __parse_attr_only_element(token: str) -> dict: 510 attr_compare_val = re.compile( 511 r"\[([a-zA-Z0-9_:\-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]" 512 ) 513 514 element = { 515 "tag": None, 516 "classList": [], 517 "id": None, 518 "attributes": [], 519 } 520 521 element["tag"] = "*" 522 523 if token not in ["", None]: 524 for attr in attr_compare_val.finditer(token): 525 name, compare, value = attr.groups() 526 if value is not None: 527 value = value.lstrip("'\"").rstrip("'\"") 528 element["attributes"].append( 529 { 530 "name": name, 531 "compare": compare, 532 "value": value, 533 }, 534 ) 535 536 return element 537 538 539def parse_specifiers(specifier: str) -> list: 540 """ 541 Rules: 542 * `*` = any element 543 * `>` = direct child of the current element 544 * `+` = first sibling 545 * `~` = elements after the current element 546 * `.` = class 547 * `#` = id 548 * `[attribute]` = elements with attribute 549 * `[attribute=value]` = elements with attribute=value 550 * `[attribute~=value]` = elements with attribute containing value 551 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 552 * `[attribute^=value]` = elements with an attribute starting with value 553 * `[attribute$=value]` = elements with an attribute ending with value 554 * `[attribute*=value]` = elements with an attribute containing value 555 """ 556 splitter = re.compile( 557 r"([~>\*+])|((?:\[[^\[\]]+\])+)|([^.#\[\]\s]+)?((?:(?:\.|#)[^.#\[\]\s]+)+)?((?:\[[^\[\]]+\])+)?" 558 ) 559 560 tokens = [] 561 for token in splitter.finditer(specifier): 562 ( 563 sibling, 564 just_attributes, 565 tag, 566 context, 567 attributes, 568 ) = token.groups() 569 if sibling in ["*", ">", "+", "~"]: 570 tokens.append(sibling) 571 elif tag is not None or context is not None or attributes is not None: 572 tokens.append(__parse_el_with_attribute(tag, context, attributes)) 573 elif just_attributes is not None: 574 tokens.append(__parse_attr_only_element(just_attributes)) 575 return tokens
23def query(tree: Parent, specifier: str) -> Element | None: 24 """Same as javascripts querySelector. `#` indicates an id and `.` 25 indicates a class. If they are used alone they match anything. 26 Any tag can be used by itself or with `#` and/or `.`. You may use 27 any number of class specifiers, but may only use one id specifier per 28 tag name. Complex specifiers are accepted are allowed meaning you can 29 have space seperated specifiers indicating nesting or a parent child 30 relationship. 31 32 Rules: 33 * `*` = any element 34 * `>` = direct child of the current element 35 * `+` = first sibling 36 * `~` = elements after the current element 37 * `.` = class 38 * `#` = id 39 * `[attribute]` = elements with attribute 40 * `[attribute=value]` = elements with attribute=value 41 * `[attribute~=value]` = elements with attribute containing value 42 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 43 * `[attribute^=value]` = elements with an attribute starting with value 44 * `[attribute$=value]` = elements with an attribute ending with value 45 * `[attribute*=value]` = elements with an attribute containing value 46 47 Examles: 48 * `.some-example` matches the first element with the class `some-example` 49 * `#some-example` matches the first element with the id `some-example` 50 * `li` matches the first `li` element 51 * `li.red` matches the first `li` with the class `red` 52 * `li#red` matches the first `li` with the id `red` 53 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 54 * `div.form-control input[type="checkbox"]` matches the first `input` with the 55 attribute `type="checked"` that has a parent `div` with the class `form-control`. 56 57 Return: 58 Element | None: The first element matching the specifier or None if no element was 59 found. 60 """ 61 62 def all_nodes(current: Parent, rules: list, include_self: bool = True): 63 """Get all nodes starting with the current node.""" 64 65 result = None 66 for node in walk(current): 67 if isinstance(node, Element) and (include_self or node != current): 68 result = branch(node, rules) 69 if result is not None: 70 break 71 return result 72 73 def all_children(current: Parent, rules: list): 74 """Get all children of the curret node.""" 75 result = None 76 for node in current: 77 if isinstance(node, Element): 78 result = branch(node, rules) 79 if result is not None: 80 break 81 return result 82 83 def first_sibling(node: Parent, rules: list): 84 """Get the first sibling following the node.""" 85 if node.parent is None: 86 return None 87 88 idx = node.parent.index(node) 89 if idx + 1 < len(node.parent) and isinstance(node.parent[idx + 1], Element): 90 return branch(node.parent[idx + 1], rules) 91 return None 92 93 def all_siblings(current: Parent, rules: list): 94 """Get all siblings after the current node.""" 95 if current.parent is None: 96 return None 97 98 result = None 99 idx = current.parent.index(current) 100 if idx + 1 < len(current.parent): 101 for node in range(idx + 1, len(current.parent)): 102 if isinstance(current.parent[node], Element): 103 result = branch(current.parent[node], rules) 104 if result is not None: 105 break 106 return result 107 108 def process_dict(rules: list, node: Element): 109 if is_equal(rules[0], node): 110 if len(rules) - 1 == 0: 111 return node 112 113 if isinstance(rules[1], dict) or rules[1] == "*": 114 return ( 115 all_nodes(node, rules[1:], False) 116 if isinstance(rules[1], dict) 117 else all_nodes(node, rules[2:], False) 118 ) 119 120 return branch(node, rules[1:]) 121 return None 122 123 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements 124 """Based on the current rule, recursively check the nodes. 125 If on the last rule then return the current valid node. 126 """ 127 128 if isinstance(node, Parent): 129 if len(rules) == 0: 130 return node 131 132 if isinstance(rules[0], dict) and isinstance(node, Element): 133 return process_dict(rules, node) 134 135 if rules[0] == "*": 136 return all_nodes(node, rules[1:]) 137 138 if rules[0] == ">": 139 return all_children(node, rules[1:]) 140 141 if rules[0] == "+": 142 return first_sibling(node, rules[1:]) 143 144 if rules[0] == "~": 145 return all_siblings(node, rules[1:]) 146 147 rules = parse_specifiers(specifier) 148 return all_nodes(tree, rules)
Same as javascripts querySelector. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Rules:
*
= any element>
= direct child of the current element+
= first sibling~
= elements after the current element.
= class#
= id[attribute]
= elements with attribute[attribute=value]
= elements with attribute=value[attribute~=value]
= elements with attribute containing value[attribute|=value]
= elements with attribute=value or attribute starting with value-[attribute^=value]
= elements with an attribute starting with value[attribute$=value]
= elements with an attribute ending with value[attribute*=value]
= elements with an attribute containing value
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
Element | None: The first element matching the specifier or None if no element was found.
151def query_all(tree: Parent, specifier: str) -> list[Element]: 152 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 153 indicates a class. If they are used alone they match anything. 154 Any tag can be used by itself or with `#` and/or `.`. You may use 155 any number of class specifiers, but may only use one id specifier per 156 tag name. Complex specifiers are accepted are allowed meaning you can 157 have space seperated specifiers indicating nesting or a parent child 158 relationship. 159 160 Rules: 161 * `*` = any element 162 * `>` = direct child of the current element 163 * `+` = first sibling 164 * `~` = elements after the current element 165 * `.` = class 166 * `#` = id 167 * `[attribute]` = elements with attribute 168 * `[attribute=value]` = elements with attribute=value 169 * `[attribute~=value]` = elements with attribute containing value 170 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 171 * `[attribute^=value]` = elements with an attribute starting with value 172 * `[attribute$=value]` = elements with an attribute ending with value 173 * `[attribute*=value]` = elements with an attribute containing value 174 175 Examles: 176 * `.some-example` matches the first element with the class `some-example` 177 * `#some-example` matches the first element with the id `some-example` 178 * `li` matches the first `li` element 179 * `li.red` matches the first `li` with the class `red` 180 * `li#red` matches the first `li` with the id `red` 181 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 182 * `div.form-control input[type="checkbox"]` matches the first `input` with the 183 attribute `type="checked"` that has a parent `div` with the class `form-control`. 184 185 Return: 186 list[Element] | None: The all elements matching the specifier or and empty list if no 187 elements were found. 188 """ 189 190 def all_nodes(current: Parent, rules: list, include_self: bool = True): 191 """Get all nodes starting with the current node.""" 192 results = [] 193 for node in walk(current): 194 if isinstance(node, Element) and (include_self or node != current): 195 results.extend(branch(node, rules) or []) 196 return results 197 198 def all_children(current: Parent, rules: list): 199 """Get all children of the curret node.""" 200 results = [] 201 for node in current: 202 if isinstance(node, Element): 203 results.extend(branch(node, rules) or []) 204 return results 205 206 def first_sibling(node: Parent, rules: list): 207 """Get the first sibling following the node.""" 208 if node.parent is None: 209 return [] 210 211 idx = node.parent.index(node) 212 if idx + 1 < len(node.parent) and node.parent[idx + 1].type == "element": 213 result = branch(node.parent[idx + 1], rules) 214 return result 215 return [] 216 217 def all_siblings(current: Parent, rules: list): 218 """Get all siblings after the current node.""" 219 if current.parent is None: 220 return [] 221 222 results = [] 223 idx = current.parent.index(current) 224 if idx + 1 < len(current.parent): 225 for node in range(idx + 1, len(current.parent)): 226 if current.parent[node].type == "element": 227 results.extend(branch(current.parent[node], rules) or []) 228 return results 229 230 def process_dict(rules: list, node: Element): 231 if is_equal(rules[0], node): 232 if len(rules) - 1 == 0: 233 return [node] 234 235 if isinstance(rules[1], dict) or rules[1] == "*": 236 return ( 237 all_nodes(node, rules[1:]) 238 if isinstance(rules[1], dict) 239 else all_nodes(node, rules[2:], False) 240 ) 241 242 return branch(node, rules[1:]) 243 return [] 244 245 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements 246 """Based on the current rule, recursively check the nodes. 247 If on the last rule then return the current valid node. 248 """ 249 250 if isinstance(node, Parent): 251 if len(rules) == 0: 252 return [node] 253 254 if isinstance(rules[0], dict) and isinstance(node, Element): 255 return process_dict(rules, node) 256 257 if rules[0] == "*": 258 return all_nodes(node, rules[1:]) 259 260 if rules[0] == ">": 261 return all_children(node, rules[1:]) 262 263 if rules[0] == "+": 264 return first_sibling(node, rules[1:]) 265 266 if rules[0] == "~": 267 return all_siblings(node, rules[1:]) 268 269 rules = parse_specifiers(specifier) 270 return all_nodes(tree, rules) 271 # return [result[i] for i in range(len(result)) if i == result.index(result[i])]
Same as javascripts querySelectorAll. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Rules:
*
= any element>
= direct child of the current element+
= first sibling~
= elements after the current element.
= class#
= id[attribute]
= elements with attribute[attribute=value]
= elements with attribute=value[attribute~=value]
= elements with attribute containing value[attribute|=value]
= elements with attribute=value or attribute starting with value-[attribute^=value]
= elements with an attribute starting with value[attribute$=value]
= elements with an attribute ending with value[attribute*=value]
= elements with an attribute containing value
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
list[Element] | None: The all elements matching the specifier or and empty list if no elements were found.
274def matches(node: Element, specifier: str) -> bool: 275 """Works the same as the Javascript matches. `#` indicates an id and `.` 276 indicates a class. If they are used alone they match anything. 277 Any tag can be used by itself or with `#` and/or `.`. You may use 278 any number of class specifiers, but may only use one id specifier per 279 tag name. Complex specifiers are not supported. Everything in the specifier 280 must relate to one element/tag. 281 282 Rules: 283 * `.` = class 284 * `#` = id 285 * `[attribute]` = elements with attribute 286 * `[attribute=value]` = elements with attribute=value 287 * `[attribute~=value]` = elements with attribute containing value 288 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 289 * `[attribute^=value]` = elements with an attribute starting with value 290 * `[attribute$=value]` = elements with an attribute ending with value 291 * `[attribute*=value]` = elements with an attribute containing value 292 293 Examles: 294 * `.some-example` matches the element with the class `some-example` 295 * `#some-example` matches the element with the id `some-example` 296 * `li` matches an `li` element 297 * `li.red` matches the an `li` with the class `red` 298 * `li#red` matches the an `li` with the id `red` 299 * `input[type="checkbox"]` matches the `input` element with the attribute `type="checkbox"` 300 """ 301 302 rules = parse_specifiers(specifier) 303 304 if len(rules) > 1: 305 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 306 if not isinstance(rules[0], dict): 307 raise Exception( 308 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 309Example: `li.red#sample[class^='form-'][title~='sample']`", 310 ) 311 312 return is_equal(rules[0], node)
Works the same as the Javascript matches. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are not supported. Everything in the specifier
must relate to one element/tag.
Rules:
.
= class#
= id[attribute]
= elements with attribute[attribute=value]
= elements with attribute=value[attribute~=value]
= elements with attribute containing value[attribute|=value]
= elements with attribute=value or attribute starting with value-[attribute^=value]
= elements with an attribute starting with value[attribute$=value]
= elements with an attribute ending with value[attribute*=value]
= elements with an attribute containing value
Examles:
.some-example
matches the element with the classsome-example
#some-example
matches the element with the idsome-example
li
matches anli
elementli.red
matches the anli
with the classred
li#red
matches the anli
with the idred
input[type="checkbox"]
matches theinput
element with the attributetype="checkbox"
540def parse_specifiers(specifier: str) -> list: 541 """ 542 Rules: 543 * `*` = any element 544 * `>` = direct child of the current element 545 * `+` = first sibling 546 * `~` = elements after the current element 547 * `.` = class 548 * `#` = id 549 * `[attribute]` = elements with attribute 550 * `[attribute=value]` = elements with attribute=value 551 * `[attribute~=value]` = elements with attribute containing value 552 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 553 * `[attribute^=value]` = elements with an attribute starting with value 554 * `[attribute$=value]` = elements with an attribute ending with value 555 * `[attribute*=value]` = elements with an attribute containing value 556 """ 557 splitter = re.compile( 558 r"([~>\*+])|((?:\[[^\[\]]+\])+)|([^.#\[\]\s]+)?((?:(?:\.|#)[^.#\[\]\s]+)+)?((?:\[[^\[\]]+\])+)?" 559 ) 560 561 tokens = [] 562 for token in splitter.finditer(specifier): 563 ( 564 sibling, 565 just_attributes, 566 tag, 567 context, 568 attributes, 569 ) = token.groups() 570 if sibling in ["*", ">", "+", "~"]: 571 tokens.append(sibling) 572 elif tag is not None or context is not None or attributes is not None: 573 tokens.append(__parse_el_with_attribute(tag, context, attributes)) 574 elif just_attributes is not None: 575 tokens.append(__parse_attr_only_element(just_attributes)) 576 return tokens
Rules:
*
= any element>
= direct child of the current element+
= first sibling~
= elements after the current element.
= class#
= id[attribute]
= elements with attribute[attribute=value]
= elements with attribute=value[attribute~=value]
= elements with attribute containing value[attribute|=value]
= elements with attribute=value or attribute starting with value-[attribute^=value]
= elements with an attribute starting with value[attribute$=value]
= elements with an attribute ending with value[attribute*=value]
= elements with an attribute containing value