Coverage for phml\utils\locate\select.py: 72%

217 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-30 09:38 -0600

1"""utils.select 

2 

3A collection of utilities around querying for specific 

4types of data. 

5""" 

6 

7from typing import Callable 

8 

9from phml.nodes import AST, Element, Root 

10from phml.utils.travel import visit_children, walk 

11 

12__all__ = ["query", "queryAll", "matches"] 

13 

14 

15def query(tree: AST | Root | Element, specifier: str) -> Element: 

16 """Same as javascripts querySelector. `#` indicates an id and `.` 

17 indicates a class. If they are used alone they match anything. 

18 Any tag can be used by itself or with `#` and/or `.`. You may use 

19 any number of class specifiers, but may only use one id specifier per 

20 tag name. Complex specifiers are accepted are allowed meaning you can 

21 have space seperated specifiers indicating nesting or a parent child 

22 relationship. 

23 

24 Examles: 

25 * `.some-example` matches the first element with the class `some-example` 

26 * `#some-example` matches the first element with the id `some-example` 

27 * `li` matches the first `li` element 

28 * `li.red` matches the first `li` with the class `red` 

29 * `li#red` matches the first `li` with the id `red` 

30 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 

31 * `div.form-control input[type="checkbox"]` matches the first `input` with the 

32 attribute `type="checked"` that has a parent `div` with the class `form-control`. 

33 

34 Return: 

35 Element | None: The first element matching the specifier or None if no element was 

36 found. 

37 """ 

38 if isinstance(tree, AST): 

39 tree = tree.tree 

40 

41 rules = __parse_specifiers(specifier) 

42 

43 def all_nodes(node: Element, rules: list, include_self: bool = True): 

44 """Get all nodes starting with the current node.""" 

45 

46 result = None 

47 for n in walk(node): 

48 if n.type == "element" and (include_self or n != node): 

49 result = branch(n, rules) 

50 if result is not None: 

51 break 

52 return result 

53 

54 def all_children(node: Element, rules: list): 

55 """Get all children of the curret node.""" 

56 result = None 

57 for n in visit_children(node): 

58 if n.type == "element": 

59 result = branch(n, rules) 

60 if result is not None: 

61 break 

62 return result 

63 

64 def first_sibling(node: Element, rules: list): 

65 """Get the first sibling following the node.""" 

66 if node.parent == None: 

67 return None 

68 

69 idx = node.parent.children.index(node) 

70 if idx + 1 < len(node.parent.children): 

71 if node.parent.children[idx + 1].type == "element": 

72 return branch(node.parent.children[idx + 1], rules) 

73 return None 

74 

75 def all_siblings(node: Element, rules: list): 

76 """Get all siblings after the current node.""" 

77 if node.parent == None: 

78 return None 

79 

80 result = None 

81 idx = node.parent.children.index(node) 

82 if idx + 1 < len(node.parent.children): 

83 for n in range(idx + 1, len(node.parent.children)): 

84 if node.parent.children[n].type == "element": 

85 result = branch(node.parent.children[n], rules) 

86 if result is not None: 

87 break 

88 return result 

89 

90 def branch(node: Element, rules: list): 

91 """Based on the current rule, recursively check the nodes. 

92 If on the last rule then return the current valid node. 

93 """ 

94 

95 if len(rules) == 0: 

96 return node 

97 elif isinstance(rules[0], dict): 

98 if is_equal(rules[0], node): 

99 if len(rules) - 1 == 0: 

100 return node 

101 else: 

102 if isinstance(rules[1], dict): 

103 return all_nodes(node, rules[1:]) 

104 elif rules[1] == "*": 

105 return all_nodes(node, rules[2:], False) 

106 else: 

107 return branch(node, rules[1:]) 

108 else: 

109 return None 

110 elif rules[0] == "*": 

111 return all_nodes(node, rules[1:]) 

112 elif rules[0] == ">": 

113 return all_children(node, rules[1:]) 

114 elif rules[0] == "+": 

115 return first_sibling(node, rules[1:]) 

116 elif rules[0] == "~": 

117 return all_siblings(node, rules[1:]) 

118 

119 return all_nodes(tree, rules) 

120 

121 

122def queryAll(tree: AST | Root | Element, specifier: str) -> list[Element]: 

123 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 

124 indicates a class. If they are used alone they match anything. 

125 Any tag can be used by itself or with `#` and/or `.`. You may use 

126 any number of class specifiers, but may only use one id specifier per 

127 tag name. Complex specifiers are accepted are allowed meaning you can 

128 have space seperated specifiers indicating nesting or a parent child 

129 relationship. 

130 

131 Examles: 

132 * `.some-example` matches the first element with the class `some-example` 

133 * `#some-example` matches the first element with the id `some-example` 

134 * `li` matches the first `li` element 

135 * `li.red` matches the first `li` with the class `red` 

136 * `li#red` matches the first `li` with the id `red` 

137 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 

138 * `div.form-control input[type="checkbox"]` matches the first `input` with the 

139 attribute `type="checked"` that has a parent `div` with the class `form-control`. 

140 

141 Return: 

142 list[Element] | None: The all elements matching the specifier or and empty list if no elements were 

143 found. 

144 """ 

145 if isinstance(tree, AST): 

146 tree = tree.tree 

147 

148 rules = __parse_specifiers(specifier) 

149 

150 def all_nodes(node: Element, rules: list, include_self: bool = True): 

151 """Get all nodes starting with the current node.""" 

152 results = [] 

153 for n in walk(node): 

154 if n.type == "element" and (include_self or n != node): 

155 result = branch(n, rules) 

156 if result is not None: 

157 results.extend(result) 

158 return results 

159 

160 def all_children(node: Element, rules: list): 

161 """Get all children of the curret node.""" 

162 results = [] 

163 for n in visit_children(node): 

164 if n.type == "element": 

165 result = branch(n, rules) 

166 if result is not None: 

167 results.extend(result) 

168 return results 

169 

170 def first_sibling(node: Element, rules: list): 

171 """Get the first sibling following the node.""" 

172 if node.parent == None: 

173 return [] 

174 

175 idx = node.parent.children.index(node) 

176 if idx + 1 < len(node.parent.children): 

177 if node.parent.children[idx + 1].type == "element": 

178 return [*branch(node.parent.children[idx + 1], rules)] 

179 return [] 

180 

181 def all_siblings(node: Element, rules: list): 

182 """Get all siblings after the current node.""" 

183 if node.parent == None: 

184 return [] 

185 

186 results = [] 

187 idx = node.parent.children.index(node) 

188 if idx + 1 < len(node.parent.children): 

189 for n in range(idx + 1, len(node.parent.children)): 

190 if node.parent.children[n].type == "element": 

191 result = branch(node.parent.children[n], rules) 

192 if result is not None: 

193 results.extend(result) 

194 return results 

195 

196 def branch(node: Element, rules: list): 

197 """Based on the current rule, recursively check the nodes. 

198 If on the last rule then return the current valid node. 

199 """ 

200 if len(rules) == 0: 

201 return [node] 

202 elif isinstance(rules[0], dict): 

203 if is_equal(rules[0], node): 

204 if len(rules) - 1 == 0: 

205 return [node] 

206 else: 

207 if isinstance(rules[1], dict): 

208 return all_nodes(node, rules[1:]) 

209 elif rules[1] == "*": 

210 return all_nodes(node, rules[2:], False) 

211 else: 

212 return branch(node, rules[1:]) 

213 else: 

214 return None 

215 elif rules[0] == "*": 

216 return all_nodes(node, rules[1:]) 

217 elif rules[0] == ">": 

218 return all_children(node, rules[1:]) 

219 elif rules[0] == "+": 

220 return first_sibling(node, rules[1:]) 

221 elif rules[0] == "~": 

222 return all_siblings(node, rules[1:]) 

223 

224 return all_nodes(tree, rules) 

225 

226 

227def matches(node: Element, specifier: str) -> bool: 

228 """Works the same as the Javascript matches. `#` indicates an id and `.` 

229 indicates a class. If they are used alone they match anything. 

230 Any tag can be used by itself or with `#` and/or `.`. You may use 

231 any number of class specifiers, but may only use one id specifier per 

232 tag name. Complex specifiers are not supported. Everything in the specifier 

233 must relate to one element/tag. 

234 

235 Examles: 

236 * `.some-example` matches the first element with the class `some-example` 

237 * `#some-example` matches the first element with the id `some-example` 

238 * `li` matches the first `li` element 

239 * `li.red` matches the first `li` with the class `red` 

240 * `li#red` matches the first `li` with the id `red` 

241 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 

242 """ 

243 

244 rules = __parse_specifiers(specifier) 

245 

246 if len(rules) > 1: 

247 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 

248 if not isinstance(rules[0], dict): 

249 raise Exception( 

250 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 

251Example: `li.red#sample[class^='form-'][title~='sample']`" 

252 ) 

253 

254 return is_equal(rules[0], node) 

255 

256 

257def is_equal(rule: dict, node: Element) -> bool: 

258 """Checks if a rule is valid on a node. 

259 A rule is a dictionary of possible values and each value must 

260 be valid on the node. 

261 

262 A rule may have a tag, id, classList, and attribute list: 

263 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 

264 * If the `id` is provided, the nodes `id` must match the rules `id` 

265 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 

266 class attribute 

267 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 

268 against the nodes attributes given the `attribute` lists comparators. Below is the list of 

269 possible comparisons. 

270 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 

271 value. 

272 2. Equals: `[checked='no']` yields any element with `checked='no'` 

273 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 

274 containing `sample` 

275 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 

276 a class that equals `sample` or or a class that starts with `sample-` 

277 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 

278 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 

279 

280 Args: 

281 rule (dict): The rule to apply to the node. 

282 node (Element): The node the validate. 

283 

284 Returns: 

285 bool: Whether the node passes all the rules in the dictionary. 

286 """ 

287 

288 # Validate tag 

289 if rule["tag"] != "*" and rule["tag"] != node.tag: 

290 return False 

291 

292 # Validate id 

293 if rule["id"] is not None and rule["id"] != node.properties["id"]: 

294 return False 

295 

296 # Validate class list 

297 if len(rule["classList"]) > 0: 

298 for klass in rule["classList"]: 

299 if "class" not in node.properties or klass not in node.properties["class"].split(" "): 

300 return False 

301 

302 # Validate all attributes 

303 if len(rule["attributes"]) > 0: 

304 for attr in rule["attributes"]: 

305 if attr["name"] in node.properties.keys(): 

306 if attr["compare"] is not None: 

307 if attr["compare"] == "=": 

308 if attr["value"] != node.properties[attr["name"]]: 

309 return False 

310 elif attr["compare"] == "|": 

311 

312 if not is_valid_attr( 

313 attr=node.properties[attr["name"]], 

314 sub=attr["value"], 

315 validator=lambda x, y: x == y or x.startswith(f"{y}-"), 

316 ): 

317 return False 

318 elif attr["compare"] == "^": 

319 if not is_valid_attr( 

320 attr=node.properties[attr["name"]], 

321 sub=attr["value"], 

322 validator=lambda x, y: x.startswith(y), 

323 ): 

324 return False 

325 elif attr["compare"] == "$": 

326 if not is_valid_attr( 

327 attr=node.properties[attr["name"]], 

328 sub=attr["value"], 

329 validator=lambda x, y: x.endswith(y), 

330 ): 

331 return False 

332 elif attr["compare"] in ["*", "~"]: 

333 if not is_valid_attr( 

334 attr=node.properties[attr["name"]], 

335 sub=attr["value"], 

336 validator=lambda x, y: y in x, 

337 ): 

338 return False 

339 else: 

340 return True 

341 else: 

342 return False 

343 return True 

344 

345 

346def is_valid_attr(attr: str, sub: str, validator: Callable) -> bool: 

347 """Validate an attribute value with a given string and a validator callable. 

348 If classlist, create list with attribute value seperated on spaces. Otherwise, 

349 the list will only have the attribute value. For each item in the list, check 

350 against validator, if valid add to count. 

351 

352 Returns: 

353 True if the valid count is greater than 0. 

354 """ 

355 list_attributes = ["class"] 

356 

357 compare_values = [attr] 

358 if attr["name"] in list_attributes: 

359 compare_values = attr.split(" ") 

360 

361 if len([item for item in compare_values if validator(item, sub)]) == 0: 

362 return False 

363 

364 

365def __parse_specifiers(specifier: str) -> dict: 

366 """ 

367 Rules: 

368 * `*` = any element 

369 * `>` = Everything with certain parent child relationship 

370 * `+` = first sibling 

371 * `~` = All after 

372 * `.` = class 

373 * `#` = id 

374 * `[attribute]` = all elements with attribute 

375 * `[attribute=value]` = all elements with attribute=value 

376 * `[attribute~=value]` = all elements with attribute containing value 

377 * `[attribute|=value]` = all elements with attribute=value or attribute starting with value- 

378 * `node[attribute^=value]` = all elements with attribute starting with value 

379 * `node[attribute$=value]` = all elements with attribute ending with value 

380 * `node[attribute*=value]` = all elements with attribute containing value 

381 

382 """ 

383 from re import compile 

384 

385 splitter = compile(r"([~>\*+])|(([.#]?[a-zA-Z0-9_-]+)+((\[[^\[\]]+\]))*)|(\[[^\[\]]+\])+") 

386 

387 el_with_attr = compile(r"([.#]?[a-zA-Z0-9_-]+)+(\[[^\[\]]+\])*") 

388 el_only_attr = compile(r"((\[[^\[\]]+\]))+") 

389 

390 el_classid_from_attr = compile(r"([a-zA-Z0-9_#.-]+)((\[.*\])*)") 

391 el_from_class_from_id = compile(r"(#|\.)?([a-zA-Z0-9_-]+)") 

392 attr_compare_val = compile(r"\[([a-zA-Z0-9_-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]") 

393 

394 tokens = [] 

395 for token in splitter.finditer(specifier): 

396 if token.group() in ["*", ">", "+", "~"]: 

397 tokens.append(token.group()) 

398 elif el_with_attr.match(token.group()): 

399 element = { 

400 "tag": "*", 

401 "classList": [], 

402 "id": None, 

403 "attributes": [], 

404 } 

405 

406 res = el_classid_from_attr.match(token.group()) 

407 

408 el_class_id, attrs = res.group(1), res.group(2) 

409 

410 if attrs not in ["", None]: 

411 for attr in attr_compare_val.finditer(attrs): 

412 name, compare, value = attr.groups() 

413 if value is not None: 

414 value = value.lstrip("'\"").rstrip("'\"") 

415 element["attributes"].append( 

416 { 

417 "name": name, 

418 "compare": compare, 

419 "value": value, 

420 } 

421 ) 

422 

423 if el_class_id not in ["", None]: 

424 for item in el_from_class_from_id.finditer(el_class_id): 

425 if item.group(1) == ".": 

426 if item.group(2) not in element["classList"]: 

427 element["classList"].append(item.group(2)) 

428 elif item.group(1) == "#": 

429 if element["id"] is None: 

430 element["id"] = item.group(2) 

431 else: 

432 raise Exception( 

433 f"There may only be one id per element specifier.\n{token.group()}" 

434 ) 

435 else: 

436 element["tag"] = item.group(2) or "*" 

437 

438 tokens.append(element) 

439 elif el_only_attr.match(token.group()): 

440 element = { 

441 "tag": None, 

442 "classList": [], 

443 "id": None, 

444 "attributes": [], 

445 } 

446 

447 element["tag"] = "*" 

448 

449 if token.group() not in ["", None]: 

450 for attr in attr_compare_val.finditer(token.group()): 

451 name, compare, value = attr.groups() 

452 if value is not None: 

453 value = value.lstrip("'\"").rstrip("'\"") 

454 element["attributes"].append( 

455 { 

456 "name": name, 

457 "compare": compare, 

458 "value": value, 

459 } 

460 ) 

461 

462 tokens.append(element) 

463 

464 return tokens