Coverage for phml\utils\transform\sanitize\clean.py: 3%

75 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-30 09:38 -0600

1from typing import Optional 

2 

3from phml.nodes import AST, Element, Root 

4 

5from .schema import Schema 

6 

7 

8def sanatize(tree: AST | Root | Element, schema: Optional[Schema] = Schema()): 

9 """Sanatize elements and attributes in the phml tree. Should be used when using 

10 data from an unkown source. It should be used with an AST that has already been 

11 compiled to html to no unkown values are unchecked. 

12 

13 By default the sanatization schema uses the github schema and follows the hast 

14 sanatize utility. 

15 

16 * [github schema](https://github.com/syntax-tree/hast-util-sanitize/blob/main/lib/schema.js) 

17 * [hast sanatize](https://github.com/syntax-tree/hast-util-sanitize) 

18 

19 Note: 

20 This utility will edit the tree in place. 

21 

22 Args: 

23 tree (AST | Root | Element): The root of the tree that will be sanatized. 

24 schema (Optional[Schema], optional): User defined schema. Defaults to github schema. 

25 """ 

26 

27 from phml.utils import is_element, remove_nodes, test 

28 

29 if isinstance(tree, AST): 

30 src = tree.tree 

31 else: 

32 src = tree 

33 

34 for strip in schema.strip: 

35 remove_nodes(src, ["element", {"tag": strip}]) 

36 

37 def recurse_check_tag(node: Root | Element): 

38 pop_els = [] 

39 for idx, child in enumerate(node.children): 

40 if test(child, "element") and not is_element(child, schema.tagNames): 

41 pop_els.append(child) 

42 elif test(node.children[idx], "element"): 

43 recurse_check_tag(node.children[idx]) 

44 

45 for el in pop_els: 

46 node.children.remove(el) 

47 

48 def recurse_check_ancestor(node: Root | Element): 

49 pop_els = [] 

50 for idx, child in enumerate(node.children): 

51 if ( 

52 test(child, "element") 

53 and child.tag in schema.ancestors.keys() 

54 and child.parent.tag not in schema.ancestors[child.tag] 

55 ): 

56 pop_els.append(child) 

57 elif test(node.children[idx], "element"): 

58 recurse_check_ancestor(node.children[idx]) 

59 

60 for el in pop_els: 

61 node.children.remove(el) 

62 

63 def recurse_check_attributes(node: Root | Element): 

64 for idx, child in enumerate(node.children): 

65 if test(child, "element") and child.tag in schema.attributes.keys(): 

66 valid_attrs = [] 

67 for attr in schema.attributes[child.tag]: 

68 if isinstance(attr, str): 

69 valid_attrs.append(attr) 

70 elif isinstance(attr, list): 

71 valid_attrs.append(attr[0]) 

72 

73 pop_attrs = [] 

74 for attr in node.children[idx].properties: 

75 if attr not in valid_attrs: 

76 pop_attrs.append(attr) 

77 else: 

78 for a in schema.attributes[child.tag]: 

79 if isinstance(a, str) and a != attr: 

80 pop_attrs.append(attr) 

81 elif a[0] == attr and node.children[idx].properties[attr] not in a[1:]: 

82 pop_attrs.append(attr) 

83 elif attr in schema.protocols and not check_protocols( 

84 child.properties[attr], schema.protocols[attr] 

85 ): 

86 pop_attrs.append(attr) 

87 

88 for attr in pop_attrs: 

89 node.children[idx].properties.pop(attr, None) 

90 

91 elif test(node.children[idx], "element"): 

92 recurse_check_attributes(node.children[idx]) 

93 

94 def recurse_check_required(node: Root | Element): 

95 for idx, child in enumerate(node.children): 

96 if test(child, "element") and child.tag in schema.required.keys(): 

97 for attr, value in schema.required[child.tag].items(): 

98 if attr not in child.properties: 

99 node.children[idx].properties[attr] = value 

100 

101 elif test(node.children[idx], "element"): 

102 recurse_check_required(node.children[idx]) 

103 

104 def check_protocols(value: str, protocols: list[str]): 

105 from re import match 

106 

107 for protocol in protocols: 

108 if match(f"{protocol}:.*", value) is not None: 

109 return True 

110 return False 

111 

112 recurse_check_tag(src) 

113 recurse_check_ancestor(src) 

114 recurse_check_attributes(src) 

115 recurse_check_required(src) 

116 

117 

118if __name__ == "__main__": 

119 from phml.builder import p 

120 from phml.utils import inspect 

121 

122 el = p( 

123 "div", 

124 p("input", {"disabled": True, "width": "100px"}), 

125 p("custom"), 

126 p("script", "h1{color:blue;}"), 

127 p("li", "li without ol or ul"), 

128 p("blockquote", {"cite": "mailto:zboehm104@gmail.com"}), 

129 ) 

130 

131 sanatize(el)