phml.utils.locate.select
utils.select
A collection of utilities around querying for specific types of data.
1"""utils.select 2 3A collection of utilities around querying for specific 4types of data. 5""" 6 7import re 8from typing import Callable 9 10from phml.nodes import AST, Element, Root 11from phml.utils.travel import visit_children, walk 12 13__all__ = ["query", "query_all", "matches", "parse_specifiers"] 14 15 16def query(tree: AST | Root | Element, specifier: str) -> Element: 17 """Same as javascripts querySelector. `#` indicates an id and `.` 18 indicates a class. If they are used alone they match anything. 19 Any tag can be used by itself or with `#` and/or `.`. You may use 20 any number of class specifiers, but may only use one id specifier per 21 tag name. Complex specifiers are accepted are allowed meaning you can 22 have space seperated specifiers indicating nesting or a parent child 23 relationship. 24 25 Examles: 26 * `.some-example` matches the first element with the class `some-example` 27 * `#some-example` matches the first element with the id `some-example` 28 * `li` matches the first `li` element 29 * `li.red` matches the first `li` with the class `red` 30 * `li#red` matches the first `li` with the id `red` 31 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 32 * `div.form-control input[type="checkbox"]` matches the first `input` with the 33 attribute `type="checked"` that has a parent `div` with the class `form-control`. 34 35 Return: 36 Element | None: The first element matching the specifier or None if no element was 37 found. 38 """ 39 40 def all_nodes(current: Element, rules: list, include_self: bool = True): 41 """Get all nodes starting with the current node.""" 42 43 result = None 44 for node in walk(current): 45 if node.type == "element" and (include_self or node != current): 46 result = branch(node, rules) 47 if result is not None: 48 break 49 return result 50 51 def all_children(current: Element, rules: list): 52 """Get all children of the curret node.""" 53 result = None 54 for node in visit_children(current): 55 if node.type == "element": 56 result = branch(node, rules) 57 if result is not None: 58 break 59 return result 60 61 def first_sibling(node: Element, rules: list): 62 """Get the first sibling following the node.""" 63 if node.parent is None: 64 return None 65 66 idx = node.parent.children.index(node) 67 if idx + 1 < len(node.parent.children): 68 if node.parent.children[idx + 1].type == "element": 69 return branch(node.parent.children[idx + 1], rules) 70 return None 71 72 def all_siblings(current: Element, rules: list): 73 """Get all siblings after the current node.""" 74 if current.parent is None: 75 return None 76 77 result = None 78 idx = current.parent.children.index(current) 79 if idx + 1 < len(current.parent.children): 80 for node in range(idx + 1, len(current.parent.children)): 81 if current.parent.children[node].type == "element": 82 result = branch(current.parent.children[node], rules) 83 if result is not None: 84 break 85 return result 86 87 def process_dict(rules: list, node: Element): 88 if is_equal(rules[0], node): 89 if isinstance(rules[1], dict) or rules[1] == "*": 90 return ( 91 all_nodes(node, rules[1:]) 92 if isinstance(rules[1], dict) 93 else all_nodes(node, rules[2:], False) 94 ) 95 96 return branch(node, rules[1:]) 97 return None 98 99 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 100 """Based on the current rule, recursively check the nodes. 101 If on the last rule then return the current valid node. 102 """ 103 104 if len(rules) == 0: 105 return node 106 107 if isinstance(rules[0], dict): 108 return process_dict(rules, node) 109 110 if rules[0] == "*": 111 return all_nodes(node, rules[1:]) 112 113 if rules[0] == ">": 114 return all_children(node, rules[1:]) 115 116 if rules[0] == "+": 117 return first_sibling(node, rules[1:]) 118 119 if rules[0] == "~": 120 return all_siblings(node, rules[1:]) 121 122 return None 123 124 if isinstance(tree, AST): 125 tree = tree.tree 126 127 rules = parse_specifiers(specifier) 128 129 return all_nodes(tree, rules) 130 131 132def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 133 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 134 indicates a class. If they are used alone they match anything. 135 Any tag can be used by itself or with `#` and/or `.`. You may use 136 any number of class specifiers, but may only use one id specifier per 137 tag name. Complex specifiers are accepted are allowed meaning you can 138 have space seperated specifiers indicating nesting or a parent child 139 relationship. 140 141 Examles: 142 * `.some-example` matches the first element with the class `some-example` 143 * `#some-example` matches the first element with the id `some-example` 144 * `li` matches the first `li` element 145 * `li.red` matches the first `li` with the class `red` 146 * `li#red` matches the first `li` with the id `red` 147 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 148 * `div.form-control input[type="checkbox"]` matches the first `input` with the 149 attribute `type="checked"` that has a parent `div` with the class `form-control`. 150 151 Return: 152 list[Element] | None: The all elements matching the specifier or and empty list if no 153 elements were found. 154 """ 155 156 def all_nodes(current: Element, rules: list, include_self: bool = True): 157 """Get all nodes starting with the current node.""" 158 results = [] 159 for node in walk(current): 160 if node.type == "element" and (include_self or node != current): 161 result = branch(node, rules) 162 if result is not None: 163 results.extend(result) 164 return results 165 166 def all_children(current: Element, rules: list): 167 """Get all children of the curret node.""" 168 results = [] 169 for node in visit_children(current): 170 if node.type == "element": 171 result = branch(node, rules) 172 if result is not None: 173 results.extend(result) 174 return results 175 176 def first_sibling(node: Element, rules: list): 177 """Get the first sibling following the node.""" 178 if node.parent is None: 179 return [] 180 181 idx = node.parent.children.index(node) 182 if idx + 1 < len(node.parent.children): 183 if node.parent.children[idx + 1].type == "element": 184 return [*branch(node.parent.children[idx + 1], rules)] 185 return [] 186 187 def all_siblings(current: Element, rules: list): 188 """Get all siblings after the current node.""" 189 if current.parent is None: 190 return [] 191 192 results = [] 193 idx = current.parent.children.index(current) 194 if idx + 1 < len(current.parent.children): 195 for node in range(idx + 1, len(current.parent.children)): 196 if current.parent.children[node].type == "element": 197 result = branch(current.parent.children[node], rules) 198 if result is not None: 199 results.extend(result) 200 return results 201 202 def process_dict(rules: list, node: Element): 203 if is_equal(rules[0], node): 204 if isinstance(rules[1], dict) or rules[1] == "*": 205 return ( 206 all_nodes(node, rules[1:]) 207 if isinstance(rules[1], dict) 208 else all_nodes(node, rules[2:], False) 209 ) 210 211 return branch(node, rules[1:]) 212 return None 213 214 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 215 """Based on the current rule, recursively check the nodes. 216 If on the last rule then return the current valid node. 217 """ 218 219 if len(rules) == 0: 220 return [node] 221 222 if isinstance(rules[0], dict): 223 return process_dict(rules, node) 224 225 if rules[0] == "*": 226 return all_nodes(node, rules[1:]) 227 228 if rules[0] == ">": 229 return all_children(node, rules[1:]) 230 231 if rules[0] == "+": 232 return first_sibling(node, rules[1:]) 233 234 if rules[0] == "~": 235 return all_siblings(node, rules[1:]) 236 237 return None 238 239 if isinstance(tree, AST): 240 tree = tree.tree 241 242 rules = parse_specifiers(specifier) 243 return all_nodes(tree, rules) 244 245 246def matches(node: Element, specifier: str) -> bool: 247 """Works the same as the Javascript matches. `#` indicates an id and `.` 248 indicates a class. If they are used alone they match anything. 249 Any tag can be used by itself or with `#` and/or `.`. You may use 250 any number of class specifiers, but may only use one id specifier per 251 tag name. Complex specifiers are not supported. Everything in the specifier 252 must relate to one element/tag. 253 254 Examles: 255 * `.some-example` matches the first element with the class `some-example` 256 * `#some-example` matches the first element with the id `some-example` 257 * `li` matches the first `li` element 258 * `li.red` matches the first `li` with the class `red` 259 * `li#red` matches the first `li` with the id `red` 260 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 261 """ 262 263 rules = parse_specifiers(specifier) 264 265 if len(rules) > 1: 266 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 267 if not isinstance(rules[0], dict): 268 raise Exception( 269 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 270Example: `li.red#sample[class^='form-'][title~='sample']`" 271 ) 272 273 return is_equal(rules[0], node) 274 275 276def is_equal(rule: dict, node: Element) -> bool: 277 """Checks if a rule is valid on a node. 278 A rule is a dictionary of possible values and each value must 279 be valid on the node. 280 281 A rule may have a tag, id, classList, and attribute list: 282 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 283 * If the `id` is provided, the nodes `id` must match the rules `id` 284 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 285 class attribute 286 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 287 against the nodes attributes given the `attribute` lists comparators. Below is the list of 288 possible comparisons. 289 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 290 value. 291 2. Equals: `[checked='no']` yields any element with `checked='no'` 292 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 293 containing `sample` 294 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 295 a class that equals `sample` or or a class that starts with `sample-` 296 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 297 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 298 299 Args: 300 rule (dict): The rule to apply to the node. 301 node (Element): The node the validate. 302 303 Returns: 304 bool: Whether the node passes all the rules in the dictionary. 305 """ 306 307 # Validate tag 308 if rule["tag"] != "*" and rule["tag"] != node.tag: 309 return False 310 311 # Validate id 312 if rule["id"] is not None and rule["id"] != node.properties["id"]: 313 return False 314 315 # Validate class list 316 if len(rule["classList"]) > 0: 317 for klass in rule["classList"]: 318 if "class" not in node.properties or klass not in node.properties["class"].split(" "): 319 return False 320 321 # Validate all attributes 322 if len(rule["attributes"]) > 0: 323 return all( 324 attr["name"] in node.properties.keys() 325 and ((attr["compare"] is not None and __validate_attr(attr, node)) or True) 326 for attr in rule["attributes"] 327 ) 328 329 return True 330 331 332def __validate_attr(attr: dict, node: Element): 333 if attr["compare"] == "=" and attr["value"] != node.properties[attr["name"]]: 334 return False 335 336 if attr["compare"] == "|": 337 return is_valid_attr( 338 attr=node.properties[attr["name"]], 339 sub=attr["value"], 340 validator=lambda x, y: x == y or x.startswith(f"{y}-"), 341 ) 342 343 if attr["compare"] == "^": 344 return is_valid_attr( 345 attr=node.properties[attr["name"]], 346 sub=attr["value"], 347 validator=lambda x, y: x.startswith(y), 348 ) 349 350 if attr["compare"] == "$": 351 return is_valid_attr( 352 attr=node.properties[attr["name"]], 353 sub=attr["value"], 354 validator=lambda x, y: x.endswith(y), 355 ) 356 357 if attr["compare"] in ["*", "~"]: 358 return is_valid_attr( 359 attr=node.properties[attr["name"]], 360 sub=attr["value"], 361 validator=lambda x, y: y in x, 362 ) 363 364 return True 365 366 367def is_valid_attr(attr: str, sub: str, validator: Callable) -> bool: 368 """Validate an attribute value with a given string and a validator callable. 369 If classlist, create list with attribute value seperated on spaces. Otherwise, 370 the list will only have the attribute value. For each item in the list, check 371 against validator, if valid add to count. 372 373 Returns: 374 True if the valid count is greater than 0. 375 """ 376 list_attributes = ["class"] 377 378 compare_values = [attr] 379 if attr["name"] in list_attributes: 380 compare_values = attr.split(" ") 381 382 return bool(len([item for item in compare_values if validator(item, sub)]) > 0) 383 384 385def __parse_el_with_attribute(token: str) -> dict: 386 el_classid_from_attr = re.compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)") 387 el_from_class_from_id = re.compile(r"(#|\.)?([a-zA-Z0-9_-]+)") 388 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 389 390 element = { 391 "tag": "*", 392 "classList": [], 393 "id": None, 394 "attributes": [], 395 } 396 397 res = el_classid_from_attr.match(token) 398 399 el_class_id, attrs = res.group(1), res.group(2) 400 401 if attrs not in ["", None]: 402 for attr in attr_compare_val.finditer(attrs): 403 name, compare, value = attr.groups() 404 if value is not None: 405 value = value.lstrip("'\"").rstrip("'\"") 406 element["attributes"].append( 407 { 408 "name": name, 409 "compare": compare, 410 "value": value, 411 } 412 ) 413 414 if el_class_id not in ["", None]: 415 for item in el_from_class_from_id.finditer(el_class_id): 416 if item.group(1) == ".": 417 if item.group(2) not in element["classList"]: 418 element["classList"].append(item.group(2)) 419 elif item.group(1) == "#": 420 if element["id"] is None: 421 element["id"] = item.group(2) 422 else: 423 raise Exception( 424 f"There may only be one id per element specifier.\n{token.group()}" 425 ) 426 else: 427 element["tag"] = item.group(2) or "*" 428 429 return element 430 431 432def __parse_attr_only_element(token: str) -> dict: 433 attr_compare_val = re.compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 434 435 element = { 436 "tag": None, 437 "classList": [], 438 "id": None, 439 "attributes": [], 440 } 441 442 element["tag"] = "*" 443 444 if token not in ["", None]: 445 for attr in attr_compare_val.finditer(token): 446 name, compare, value = attr.groups() 447 if value is not None: 448 value = value.lstrip("'\"").rstrip("'\"") 449 element["attributes"].append( 450 { 451 "name": name, 452 "compare": compare, 453 "value": value, 454 } 455 ) 456 457 return element 458 459 460def parse_specifiers(specifier: str) -> dict: 461 """ 462 Rules: 463 * `*` = any element 464 * `>` = Everything with certain parent child relationship 465 * `+` = first sibling 466 * `~` = All after 467 * `.` = class 468 * `#` = id 469 * `[attribute]` = all elements with attribute 470 * `[attribute=value]` = all elements with attribute=value 471 * `[attribute~=value]` = all elements with attribute containing value 472 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 473 * `node[attribute^=value]` = all elements with attribute starting with value 474 * `node[attribute$=value]` = all elements with attribute ending with value 475 * `node[attribute*=value]` = all elements with attribute containing value 476 477 """ 478 479 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 480 481 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+") 482 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 483 484 tokens = [] 485 for token in splitter.finditer(specifier): 486 if token.group() in ["*", ">", "+", "~"]: 487 tokens.append(token.group()) 488 elif el_with_attr.match(token.group()): 489 tokens.append(__parse_el_with_attribute(token.group())) 490 elif el_only_attr.match(token.group()): 491 tokens.append(__parse_attr_only_element(token.group())) 492 493 return tokens
17def query(tree: AST | Root | Element, specifier: str) -> Element: 18 """Same as javascripts querySelector. `#` indicates an id and `.` 19 indicates a class. If they are used alone they match anything. 20 Any tag can be used by itself or with `#` and/or `.`. You may use 21 any number of class specifiers, but may only use one id specifier per 22 tag name. Complex specifiers are accepted are allowed meaning you can 23 have space seperated specifiers indicating nesting or a parent child 24 relationship. 25 26 Examles: 27 * `.some-example` matches the first element with the class `some-example` 28 * `#some-example` matches the first element with the id `some-example` 29 * `li` matches the first `li` element 30 * `li.red` matches the first `li` with the class `red` 31 * `li#red` matches the first `li` with the id `red` 32 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 33 * `div.form-control input[type="checkbox"]` matches the first `input` with the 34 attribute `type="checked"` that has a parent `div` with the class `form-control`. 35 36 Return: 37 Element | None: The first element matching the specifier or None if no element was 38 found. 39 """ 40 41 def all_nodes(current: Element, rules: list, include_self: bool = True): 42 """Get all nodes starting with the current node.""" 43 44 result = None 45 for node in walk(current): 46 if node.type == "element" and (include_self or node != current): 47 result = branch(node, rules) 48 if result is not None: 49 break 50 return result 51 52 def all_children(current: Element, rules: list): 53 """Get all children of the curret node.""" 54 result = None 55 for node in visit_children(current): 56 if node.type == "element": 57 result = branch(node, rules) 58 if result is not None: 59 break 60 return result 61 62 def first_sibling(node: Element, rules: list): 63 """Get the first sibling following the node.""" 64 if node.parent is None: 65 return None 66 67 idx = node.parent.children.index(node) 68 if idx + 1 < len(node.parent.children): 69 if node.parent.children[idx + 1].type == "element": 70 return branch(node.parent.children[idx + 1], rules) 71 return None 72 73 def all_siblings(current: Element, rules: list): 74 """Get all siblings after the current node.""" 75 if current.parent is None: 76 return None 77 78 result = None 79 idx = current.parent.children.index(current) 80 if idx + 1 < len(current.parent.children): 81 for node in range(idx + 1, len(current.parent.children)): 82 if current.parent.children[node].type == "element": 83 result = branch(current.parent.children[node], rules) 84 if result is not None: 85 break 86 return result 87 88 def process_dict(rules: list, node: Element): 89 if is_equal(rules[0], node): 90 if isinstance(rules[1], dict) or rules[1] == "*": 91 return ( 92 all_nodes(node, rules[1:]) 93 if isinstance(rules[1], dict) 94 else all_nodes(node, rules[2:], False) 95 ) 96 97 return branch(node, rules[1:]) 98 return None 99 100 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 101 """Based on the current rule, recursively check the nodes. 102 If on the last rule then return the current valid node. 103 """ 104 105 if len(rules) == 0: 106 return node 107 108 if isinstance(rules[0], dict): 109 return process_dict(rules, node) 110 111 if rules[0] == "*": 112 return all_nodes(node, rules[1:]) 113 114 if rules[0] == ">": 115 return all_children(node, rules[1:]) 116 117 if rules[0] == "+": 118 return first_sibling(node, rules[1:]) 119 120 if rules[0] == "~": 121 return all_siblings(node, rules[1:]) 122 123 return None 124 125 if isinstance(tree, AST): 126 tree = tree.tree 127 128 rules = parse_specifiers(specifier) 129 130 return all_nodes(tree, rules)
Same as javascripts querySelector. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
Element | None: The first element matching the specifier or None if no element was found.
133def query_all(tree: AST | Root | Element, specifier: str) -> list[Element]: 134 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 135 indicates a class. If they are used alone they match anything. 136 Any tag can be used by itself or with `#` and/or `.`. You may use 137 any number of class specifiers, but may only use one id specifier per 138 tag name. Complex specifiers are accepted are allowed meaning you can 139 have space seperated specifiers indicating nesting or a parent child 140 relationship. 141 142 Examles: 143 * `.some-example` matches the first element with the class `some-example` 144 * `#some-example` matches the first element with the id `some-example` 145 * `li` matches the first `li` element 146 * `li.red` matches the first `li` with the class `red` 147 * `li#red` matches the first `li` with the id `red` 148 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 149 * `div.form-control input[type="checkbox"]` matches the first `input` with the 150 attribute `type="checked"` that has a parent `div` with the class `form-control`. 151 152 Return: 153 list[Element] | None: The all elements matching the specifier or and empty list if no 154 elements were found. 155 """ 156 157 def all_nodes(current: Element, rules: list, include_self: bool = True): 158 """Get all nodes starting with the current node.""" 159 results = [] 160 for node in walk(current): 161 if node.type == "element" and (include_self or node != current): 162 result = branch(node, rules) 163 if result is not None: 164 results.extend(result) 165 return results 166 167 def all_children(current: Element, rules: list): 168 """Get all children of the curret node.""" 169 results = [] 170 for node in visit_children(current): 171 if node.type == "element": 172 result = branch(node, rules) 173 if result is not None: 174 results.extend(result) 175 return results 176 177 def first_sibling(node: Element, rules: list): 178 """Get the first sibling following the node.""" 179 if node.parent is None: 180 return [] 181 182 idx = node.parent.children.index(node) 183 if idx + 1 < len(node.parent.children): 184 if node.parent.children[idx + 1].type == "element": 185 return [*branch(node.parent.children[idx + 1], rules)] 186 return [] 187 188 def all_siblings(current: Element, rules: list): 189 """Get all siblings after the current node.""" 190 if current.parent is None: 191 return [] 192 193 results = [] 194 idx = current.parent.children.index(current) 195 if idx + 1 < len(current.parent.children): 196 for node in range(idx + 1, len(current.parent.children)): 197 if current.parent.children[node].type == "element": 198 result = branch(current.parent.children[node], rules) 199 if result is not None: 200 results.extend(result) 201 return results 202 203 def process_dict(rules: list, node: Element): 204 if is_equal(rules[0], node): 205 if isinstance(rules[1], dict) or rules[1] == "*": 206 return ( 207 all_nodes(node, rules[1:]) 208 if isinstance(rules[1], dict) 209 else all_nodes(node, rules[2:], False) 210 ) 211 212 return branch(node, rules[1:]) 213 return None 214 215 def branch(node: Element, rules: list): # pylint: disable=too-many-return-statements 216 """Based on the current rule, recursively check the nodes. 217 If on the last rule then return the current valid node. 218 """ 219 220 if len(rules) == 0: 221 return [node] 222 223 if isinstance(rules[0], dict): 224 return process_dict(rules, node) 225 226 if rules[0] == "*": 227 return all_nodes(node, rules[1:]) 228 229 if rules[0] == ">": 230 return all_children(node, rules[1:]) 231 232 if rules[0] == "+": 233 return first_sibling(node, rules[1:]) 234 235 if rules[0] == "~": 236 return all_siblings(node, rules[1:]) 237 238 return None 239 240 if isinstance(tree, AST): 241 tree = tree.tree 242 243 rules = parse_specifiers(specifier) 244 return all_nodes(tree, rules)
Same as javascripts querySelectorAll. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are accepted are allowed meaning you can
have space seperated specifiers indicating nesting or a parent child
relationship.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
div.form-control input[type="checkbox"]
matches the firstinput
with the attributetype="checked"
that has a parentdiv
with the classform-control
.
Return
list[Element] | None: The all elements matching the specifier or and empty list if no elements were found.
247def matches(node: Element, specifier: str) -> bool: 248 """Works the same as the Javascript matches. `#` indicates an id and `.` 249 indicates a class. If they are used alone they match anything. 250 Any tag can be used by itself or with `#` and/or `.`. You may use 251 any number of class specifiers, but may only use one id specifier per 252 tag name. Complex specifiers are not supported. Everything in the specifier 253 must relate to one element/tag. 254 255 Examles: 256 * `.some-example` matches the first element with the class `some-example` 257 * `#some-example` matches the first element with the id `some-example` 258 * `li` matches the first `li` element 259 * `li.red` matches the first `li` with the class `red` 260 * `li#red` matches the first `li` with the id `red` 261 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 262 """ 263 264 rules = parse_specifiers(specifier) 265 266 if len(rules) > 1: 267 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 268 if not isinstance(rules[0], dict): 269 raise Exception( 270 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 271Example: `li.red#sample[class^='form-'][title~='sample']`" 272 ) 273 274 return is_equal(rules[0], node)
Works the same as the Javascript matches. #
indicates an id and .
indicates a class. If they are used alone they match anything.
Any tag can be used by itself or with #
and/or .
. You may use
any number of class specifiers, but may only use one id specifier per
tag name. Complex specifiers are not supported. Everything in the specifier
must relate to one element/tag.
Examles:
.some-example
matches the first element with the classsome-example
#some-example
matches the first element with the idsome-example
li
matches the firstli
elementli.red
matches the firstli
with the classred
li#red
matches the firstli
with the idred
input[type="checkbox"]
matches the firstinput
with the attributetype="checkbox"
461def parse_specifiers(specifier: str) -> dict: 462 """ 463 Rules: 464 * `*` = any element 465 * `>` = Everything with certain parent child relationship 466 * `+` = first sibling 467 * `~` = All after 468 * `.` = class 469 * `#` = id 470 * `[attribute]` = all elements with attribute 471 * `[attribute=value]` = all elements with attribute=value 472 * `[attribute~=value]` = all elements with attribute containing value 473 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 474 * `node[attribute^=value]` = all elements with attribute starting with value 475 * `node[attribute$=value]` = all elements with attribute ending with value 476 * `node[attribute*=value]` = all elements with attribute containing value 477 478 """ 479 480 splitter = re.compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 481 482 el_only_attr = re.compile(r"((\[[^\[\]]+\]))+") 483 el_with_attr = re.compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 484 485 tokens = [] 486 for token in splitter.finditer(specifier): 487 if token.group() in ["*", ">", "+", "~"]: 488 tokens.append(token.group()) 489 elif el_with_attr.match(token.group()): 490 tokens.append(__parse_el_with_attribute(token.group())) 491 elif el_only_attr.match(token.group()): 492 tokens.append(__parse_attr_only_element(token.group())) 493 494 return tokens
Rules:
*
= any element>
= Everything with certain parent child relationship+
= first sibling~
= All after.
= class#
= id[attribute]
= all elements with attribute[attribute=value]
= all elements with attribute=value[attribute~=value]
= all elements with attribute containing value[attribute|=value]
= all elements with attribute=value or attribute starting with value-node[attribute^=value]
= all elements with attribute starting with valuenode[attribute$=value]
= all elements with attribute ending with valuenode[attribute*=value]
= all elements with attribute containing value