Coverage for intelligence_toolkit/anonymize_case_data/visuals.py: 100%

66 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3# 

4import colorsys 

5from collections import defaultdict 

6import numpy as np 

7import plotly.express as px 

8import plotly.graph_objs as go 

9 

10color_schemes = { 

11 "Plotly": px.colors.qualitative.Plotly, 

12 "D3": px.colors.qualitative.D3, 

13 "G10": px.colors.qualitative.G10, 

14 "T10": px.colors.qualitative.T10, 

15 "Alphabet": px.colors.qualitative.Alphabet, 

16 "Dark24": px.colors.qualitative.Dark24, 

17 "Light24": px.colors.qualitative.Light24, 

18 "Set1": px.colors.qualitative.Set1, 

19 "Pastel1": px.colors.qualitative.Pastel1, 

20 "Dark2": px.colors.qualitative.Dark2, 

21 "Set2": px.colors.qualitative.Set2, 

22 "Pastel2": px.colors.qualitative.Pastel2, 

23 "Set3": px.colors.qualitative.Set3, 

24 "Antique": px.colors.qualitative.Antique, 

25 "Bold": px.colors.qualitative.Bold, 

26 "Pastel": px.colors.qualitative.Pastel, 

27 "Prism": px.colors.qualitative.Prism, 

28 "Safe": px.colors.qualitative.Safe, 

29 "Vivid": px.colors.qualitative.Vivid, 

30} 

31 

32def get_bar_chart(selection, show_attributes, unit, chart_df, width, height, scheme): 

33 title = f'Top {"" if len(show_attributes) == 0 else ", ".join(show_attributes)} attributes across all {unit.title()} records'.replace( 

34 " ", " " 

35 ) 

36 if len(selection) > 0: 

37 title += ( 

38 ' matching "'.replace(" ", " ") 

39 + print_selections(selection, multiline=False) 

40 + '"' 

41 ) 

42 chart_df = chart_df.copy(deep=True) 

43 fig = px.bar( 

44 chart_df, 

45 x="Attribute Value", 

46 y="Count", 

47 color="Attribute", 

48 orientation="v", 

49 text_auto=True, 

50 title=title, 

51 width=width, 

52 height=height, 

53 color_discrete_sequence=scheme, 

54 ) 

55 fig.update_xaxes(title_text="") 

56 fig.update_layout( 

57 yaxis={ 

58 "categoryorder": "total descending", 

59 "title_text": f"{unit.title()} Count" if unit != "" else "Count", 

60 } 

61 ) 

62 return fig 

63 

64def get_line_chart( 

65 selection, series_attributes, unit, chart_df, time_attribute, width, height, scheme 

66): 

67 title = f'Time series across all {unit.title()} records'.replace( 

68 " ", " " 

69 ) 

70 if len(selection) > 0: 

71 title += ( 

72 ' matching "'.replace(" ", " ") 

73 + print_selections(selection, multiline=False) 

74 + '"' 

75 ) 

76 chart_df = chart_df.copy(deep=True) 

77 fig = px.line( 

78 chart_df, 

79 x=time_attribute, 

80 y="Count", 

81 color="Attribute Value", 

82 orientation="v", 

83 title=title, 

84 width=width, 

85 height=height, 

86 color_discrete_sequence=scheme, 

87 ) 

88 fig.update_layout( 

89 yaxis={"title_text": f"{unit.title()} Count" if unit != "" else "Count"} 

90 ) 

91 return fig 

92 

93def hsl_to_hex(h, s, l): 

94 rgb = colorsys.hls_to_rgb(h / 360, l / 100, s / 100) 

95 rgb = tuple([int(x * 255) for x in rgb]) 

96 hex_color = "#%02x%02x%02x" % rgb 

97 return hex_color 

98 

99 

100def hex_to_rgb(hex): 

101 hex = hex.lstrip("#") 

102 rgb = tuple(int(hex[i : i + 2], 16) for i in (0, 2, 4)) 

103 return rgb 

104 

105 

106def color_to_hsl(color): 

107 if color.startswith("rgb"): 

108 rgb = tuple([int(x) for x in color[4:-1].split(",")]) 

109 else: 

110 rgb = hex_to_rgb(color) 

111 hls = colorsys.rgb_to_hls(rgb[0] / 255, rgb[1] / 255, rgb[2] / 255) 

112 return [int(hls[0] * 360), int(hls[2] * 100), int(hls[1] * 100)] 

113 

114 

115def get_flow_chart( 

116 links_df, 

117 selection, 

118 source_attribute, 

119 target_attribute, 

120 highlight_attribute, 

121 width, 

122 height, 

123 unit, 

124 scheme, 

125): 

126 title = f"{source_attribute}\u2014{target_attribute} links for all {unit.title()} records".replace( 

127 " ", " " 

128 ) 

129 if len(selection) > 0: 

130 title += ( 

131 ' matching "'.replace(" ", " ") 

132 + print_selections(selection, multiline=False) 

133 + '"' 

134 ) 

135 title += ( 

136 f",<br>colored by proportion with {highlight_attribute}".replace(" ", " ") 

137 if highlight_attribute != "" 

138 else "" 

139 ) 

140 if unit != "": 

141 unit = unit + " " 

142 if highlight_attribute != "": 

143 highlight_attribute = highlight_attribute + " " 

144 nodes = sorted( 

145 list( 

146 set( 

147 links_df["Source"].unique().tolist() 

148 + [x + " " for x in links_df["Target"].unique().tolist()] 

149 ) 

150 ) 

151 ) 

152 default_color = scheme[0] 

153 h, s, l = color_to_hsl(default_color.lstrip("#")) 

154 fig = go.Figure( 

155 data=[ 

156 go.Sankey( 

157 node=dict( 

158 pad=15, 

159 thickness=20, 

160 line=dict(color="black", width=0.5), 

161 label=nodes, 

162 color=default_color, 

163 ), 

164 link=dict( 

165 source=[ 

166 nodes.index(x) for x in links_df["Source"].tolist() 

167 ], # indices correspond to labels, eg A1, A2, A1, B1, ... 

168 target=[nodes.index(x + " ") for x in links_df["Target"].tolist()], 

169 value=links_df["Count"].tolist(), 

170 color=[ 

171 hsl_to_hex(int(h), int(p * 100), 70) 

172 for p in links_df["Proportion"].tolist() 

173 ], 

174 hovertemplate=source_attribute 

175 + ": %{source.label} + " 

176 + target_attribute 

177 + ": %{target.label} = %{value:.0f}<br>+ " 

178 + highlight_attribute 

179 + " = %{customdata[0]}<br>Proportion = %{customdata[1]:.1%}<extra></extra>", 

180 customdata=np.stack( 

181 ( 

182 links_df["Highlight"].tolist(), 

183 links_df["Proportion"].tolist(), 

184 ), 

185 axis=-1, 

186 ), 

187 ), 

188 ) 

189 ] 

190 ) 

191 

192 fig.update_layout(font_size=14, width=width, height=height, title_text=title) 

193 

194 return fig 

195 

196 

197def print_selections(selection, multiline=True): 

198 sd = defaultdict(list) 

199 for x in selection: 

200 sd[x["attribute"]].append(x["value"]) 

201 for k, vs in sd.items(): 

202 vs.sort() 

203 ks = sorted(sd.keys()) 

204 text = "" 

205 if multiline: 

206 for k, vs in sd.items(): 

207 text += f"- {k} = " + " | ".join(vs) + "\n" 

208 else: 

209 text = ", ".join([f"{k}:" + "|".join(vs) for k, vs in sd.items()]) 

210 return text