Coverage for intelligence_toolkit/anonymize_case_data/visuals.py: 100%
66 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2# Licensed under the MIT license. See LICENSE file in the project.
3#
4import colorsys
5from collections import defaultdict
6import numpy as np
7import plotly.express as px
8import plotly.graph_objs as go
10color_schemes = {
11 "Plotly": px.colors.qualitative.Plotly,
12 "D3": px.colors.qualitative.D3,
13 "G10": px.colors.qualitative.G10,
14 "T10": px.colors.qualitative.T10,
15 "Alphabet": px.colors.qualitative.Alphabet,
16 "Dark24": px.colors.qualitative.Dark24,
17 "Light24": px.colors.qualitative.Light24,
18 "Set1": px.colors.qualitative.Set1,
19 "Pastel1": px.colors.qualitative.Pastel1,
20 "Dark2": px.colors.qualitative.Dark2,
21 "Set2": px.colors.qualitative.Set2,
22 "Pastel2": px.colors.qualitative.Pastel2,
23 "Set3": px.colors.qualitative.Set3,
24 "Antique": px.colors.qualitative.Antique,
25 "Bold": px.colors.qualitative.Bold,
26 "Pastel": px.colors.qualitative.Pastel,
27 "Prism": px.colors.qualitative.Prism,
28 "Safe": px.colors.qualitative.Safe,
29 "Vivid": px.colors.qualitative.Vivid,
30}
32def get_bar_chart(selection, show_attributes, unit, chart_df, width, height, scheme):
33 title = f'Top {"" if len(show_attributes) == 0 else ", ".join(show_attributes)} attributes across all {unit.title()} records'.replace(
34 " ", " "
35 )
36 if len(selection) > 0:
37 title += (
38 ' matching "'.replace(" ", " ")
39 + print_selections(selection, multiline=False)
40 + '"'
41 )
42 chart_df = chart_df.copy(deep=True)
43 fig = px.bar(
44 chart_df,
45 x="Attribute Value",
46 y="Count",
47 color="Attribute",
48 orientation="v",
49 text_auto=True,
50 title=title,
51 width=width,
52 height=height,
53 color_discrete_sequence=scheme,
54 )
55 fig.update_xaxes(title_text="")
56 fig.update_layout(
57 yaxis={
58 "categoryorder": "total descending",
59 "title_text": f"{unit.title()} Count" if unit != "" else "Count",
60 }
61 )
62 return fig
64def get_line_chart(
65 selection, series_attributes, unit, chart_df, time_attribute, width, height, scheme
66):
67 title = f'Time series across all {unit.title()} records'.replace(
68 " ", " "
69 )
70 if len(selection) > 0:
71 title += (
72 ' matching "'.replace(" ", " ")
73 + print_selections(selection, multiline=False)
74 + '"'
75 )
76 chart_df = chart_df.copy(deep=True)
77 fig = px.line(
78 chart_df,
79 x=time_attribute,
80 y="Count",
81 color="Attribute Value",
82 orientation="v",
83 title=title,
84 width=width,
85 height=height,
86 color_discrete_sequence=scheme,
87 )
88 fig.update_layout(
89 yaxis={"title_text": f"{unit.title()} Count" if unit != "" else "Count"}
90 )
91 return fig
93def hsl_to_hex(h, s, l):
94 rgb = colorsys.hls_to_rgb(h / 360, l / 100, s / 100)
95 rgb = tuple([int(x * 255) for x in rgb])
96 hex_color = "#%02x%02x%02x" % rgb
97 return hex_color
100def hex_to_rgb(hex):
101 hex = hex.lstrip("#")
102 rgb = tuple(int(hex[i : i + 2], 16) for i in (0, 2, 4))
103 return rgb
106def color_to_hsl(color):
107 if color.startswith("rgb"):
108 rgb = tuple([int(x) for x in color[4:-1].split(",")])
109 else:
110 rgb = hex_to_rgb(color)
111 hls = colorsys.rgb_to_hls(rgb[0] / 255, rgb[1] / 255, rgb[2] / 255)
112 return [int(hls[0] * 360), int(hls[2] * 100), int(hls[1] * 100)]
115def get_flow_chart(
116 links_df,
117 selection,
118 source_attribute,
119 target_attribute,
120 highlight_attribute,
121 width,
122 height,
123 unit,
124 scheme,
125):
126 title = f"{source_attribute}\u2014{target_attribute} links for all {unit.title()} records".replace(
127 " ", " "
128 )
129 if len(selection) > 0:
130 title += (
131 ' matching "'.replace(" ", " ")
132 + print_selections(selection, multiline=False)
133 + '"'
134 )
135 title += (
136 f",<br>colored by proportion with {highlight_attribute}".replace(" ", " ")
137 if highlight_attribute != ""
138 else ""
139 )
140 if unit != "":
141 unit = unit + " "
142 if highlight_attribute != "":
143 highlight_attribute = highlight_attribute + " "
144 nodes = sorted(
145 list(
146 set(
147 links_df["Source"].unique().tolist()
148 + [x + " " for x in links_df["Target"].unique().tolist()]
149 )
150 )
151 )
152 default_color = scheme[0]
153 h, s, l = color_to_hsl(default_color.lstrip("#"))
154 fig = go.Figure(
155 data=[
156 go.Sankey(
157 node=dict(
158 pad=15,
159 thickness=20,
160 line=dict(color="black", width=0.5),
161 label=nodes,
162 color=default_color,
163 ),
164 link=dict(
165 source=[
166 nodes.index(x) for x in links_df["Source"].tolist()
167 ], # indices correspond to labels, eg A1, A2, A1, B1, ...
168 target=[nodes.index(x + " ") for x in links_df["Target"].tolist()],
169 value=links_df["Count"].tolist(),
170 color=[
171 hsl_to_hex(int(h), int(p * 100), 70)
172 for p in links_df["Proportion"].tolist()
173 ],
174 hovertemplate=source_attribute
175 + ": %{source.label} + "
176 + target_attribute
177 + ": %{target.label} = %{value:.0f}<br>+ "
178 + highlight_attribute
179 + " = %{customdata[0]}<br>Proportion = %{customdata[1]:.1%}<extra></extra>",
180 customdata=np.stack(
181 (
182 links_df["Highlight"].tolist(),
183 links_df["Proportion"].tolist(),
184 ),
185 axis=-1,
186 ),
187 ),
188 )
189 ]
190 )
192 fig.update_layout(font_size=14, width=width, height=height, title_text=title)
194 return fig
197def print_selections(selection, multiline=True):
198 sd = defaultdict(list)
199 for x in selection:
200 sd[x["attribute"]].append(x["value"])
201 for k, vs in sd.items():
202 vs.sort()
203 ks = sorted(sd.keys())
204 text = ""
205 if multiline:
206 for k, vs in sd.items():
207 text += f"- {k} = " + " | ".join(vs) + "\n"
208 else:
209 text = ", ".join([f"{k}:" + "|".join(vs) for k, vs in sd.items()])
210 return text