Coverage for phml\utilities\transform\sanitize\clean.py: 100%

63 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-12 14:26 -0500

1from __future__ import annotations 

2 

3from re import match 

4 

5from phml.nodes import Element, Node, Parent 

6 

7from .schema import Schema 

8 

9 

10def recurse_check_tag(node: Parent, schema: Schema): 

11 from phml.utilities import is_element 

12 

13 for child in list(node): 

14 if isinstance(child, Element) and not is_element(child, schema.tag_names): 

15 node.remove(child) 

16 elif isinstance(child, Parent): 

17 recurse_check_tag(child, schema) 

18 

19 

20def recurse_check_ancestor(node: Parent, schema: Schema): 

21 for child in list(node): 

22 if ( 

23 isinstance(child, Element) 

24 and child.tag in schema.ancestors 

25 and ( 

26 not isinstance(child.parent, Element) 

27 or child.parent.tag not in schema.ancestors[child.tag] 

28 ) 

29 ): 

30 node.remove(child) 

31 elif isinstance(child, Element): 

32 recurse_check_ancestor(child, schema) 

33 

34 

35def build_remove_attr_list( 

36 properties: dict, 

37 attributes: dict[str, tuple[str | bool, ...]], 

38 valid_attributes: list, 

39 schema: Schema, 

40): 

41 """Build the list of attributes to remove from a dict of attributes.""" 

42 result = [] 

43 for attribute in properties: 

44 if attribute not in valid_attributes: 

45 result.append(attribute) 

46 elif attribute in attributes: 

47 if ( 

48 isinstance(properties[attribute], str) 

49 and attribute in schema.protocols 

50 and not check_protocols( 

51 properties[attribute], schema.protocols[attribute], schema 

52 ) 

53 ): 

54 result.append(attribute) 

55 elif properties[attribute] != attributes[attribute]: 

56 result.append(attribute) 

57 elif ( 

58 isinstance(properties[attribute], str) 

59 and attribute in schema.protocols 

60 and not check_protocols( 

61 properties[attribute], schema.protocols[attribute], schema 

62 ) 

63 ): 

64 result.append(attribute) 

65 return result 

66 

67 

68def recurse_check_attributes(node: Node, schema: Schema): 

69 if isinstance(node, Element): 

70 if node.tag in schema.attributes: 

71 pop_attrs = build_remove_attr_list( 

72 node.attributes, 

73 { 

74 str(attr[0]): attr[1:] 

75 for attr in ( 

76 schema.attributes[node.tag] + schema.attributes.get("*", []) 

77 ) 

78 if isinstance(attr, tuple) 

79 }, 

80 [ 

81 attr if isinstance(attr, str) else attr[0] 

82 for attr in ( 

83 schema.attributes[node.tag] + schema.attributes.get("*", []) 

84 ) 

85 ], 

86 schema, 

87 ) 

88 else: 

89 pop_attrs = build_remove_attr_list( 

90 node.attributes, 

91 { 

92 str(attr[0]): attr[1:] 

93 for attr in schema.attributes.get("*", []) 

94 if isinstance(attr, tuple) 

95 }, 

96 [ 

97 attr if isinstance(attr, str) else attr[0] 

98 for attr in schema.attributes.get("*", []) 

99 ], 

100 schema, 

101 ) 

102 

103 for attribute in pop_attrs: 

104 node.pop(attribute, None) 

105 

106 if isinstance(node, Parent): 

107 for child in node: 

108 recurse_check_attributes(child, schema) 

109 

110 

111def recurse_check_required(node: Parent, schema: Schema): 

112 for child in node: 

113 if isinstance(child, Element) and child.tag in schema.required: 

114 for attr, value in schema.required[child.tag].items(): 

115 if attr not in child.attributes: 

116 child[attr] = value 

117 elif isinstance(value, bool): 

118 child[attr] = str(value).lower() 

119 elif isinstance(value, str) and child[attr] != value: 

120 child[attr] = value 

121 elif isinstance(child, Element): 

122 recurse_check_required(child, schema) 

123 

124 

125def check_protocols(value: str, protocols: list[str], schema: Schema): 

126 return match(f"{'|'.join(protocols)}:.*", value) is not None 

127 

128 

129def recurse_strip(node, schema: Schema): 

130 from phml.utilities import is_element 

131 

132 for child in list(node): 

133 if isinstance(child, Element) and is_element(child, schema.strip): 

134 node.remove(child) 

135 elif isinstance(child, Parent): 

136 recurse_strip(child, schema) 

137 

138 

139def sanatize(tree: Parent, schema: Schema = Schema()): 

140 """Sanatize elements and attributes in the phml tree. Should be used when using 

141 data from an unkown source. It should be used with an AST that has already been 

142 compiled to html to no unkown values are unchecked. 

143 

144 By default the sanatization schema uses the github schema and follows the hast 

145 sanatize utility. 

146 

147 * [github schema](https://github.com/syntax-tree/hast-util-sanitize/blob/main/lib/schema.js) 

148 * [hast sanatize](https://github.com/syntax-tree/hast-util-sanitize) 

149 

150 Note: 

151 This utility will edit the tree in place. 

152 

153 Args: 

154 tree (Parent): The root of the tree that will be sanatized. 

155 schema (Schema, optional): User defined schema. Defaults to github schema. 

156 """ 

157 

158 from phml.utilities import remove_nodes # pylint: disable=import-outside-toplevel 

159 

160 for strip in schema.strip: 

161 remove_nodes(tree, ["element", {"tag": strip}]) 

162 

163 recurse_check_tag(tree, schema) 

164 recurse_strip(tree, schema) 

165 recurse_check_ancestor(tree, schema) 

166 recurse_check_attributes(tree, schema) 

167 recurse_check_required(tree, schema)