Coverage for intelligence_toolkit/tests/unit/detect_case_patterns/test_model.py: 100%
72 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2# Licensed under the MIT license. See LICENSE file in the project.
3#
5import networkx as nx
6import pandas as pd
8from intelligence_toolkit.detect_case_patterns.config import (
9 min_edge_weight,
10 missing_edge_prop,
11 type_val_sep,
12)
13from intelligence_toolkit.detect_case_patterns.model import (
14 compute_attribute_counts,
15 generate_graph_model,
16 prepare_graph,
17)
20def test_generate_graph_model_basic(mocker):
21 data = {
22 "Subject ID": [1, 2],
23 "Period": ["P1", "P2"],
24 "Attribute1": [10, 20],
25 "Attribute2": [30, 40],
26 }
28 test_df = pd.DataFrame(data)
30 mocker.patch(
31 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
32 ).return_value = test_df.astype(str)
33 result = generate_graph_model(test_df, "Period", type_val_sep)
35 expected_data = {
36 "Subject ID": ["1", "2", "1", "2"],
37 "Period": ["P1", "P2", "P1", "P2"],
38 "Attribute Type": ["Attribute1", "Attribute1", "Attribute2", "Attribute2"],
39 "Attribute Value": ["10", "20", "30", "40"],
40 "Full Attribute": [
41 f"Attribute1{type_val_sep}10",
42 f"Attribute1{type_val_sep}20",
43 f"Attribute2{type_val_sep}30",
44 f"Attribute2{type_val_sep}40",
45 ],
46 }
48 expected_df = pd.DataFrame(expected_data)
49 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
52def test_generate_graph_model_with_nans(mocker):
53 data = {
54 "Subject ID": [1, 2, None],
55 "Period": ["P1", "P2", None],
56 "Attribute1": [10, None, 30],
57 "Attribute2": [None, 40, 50],
58 }
60 test_df = pd.DataFrame(data)
62 mocker.patch(
63 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
64 ).return_value = test_df.fillna("").astype(str)
65 result = generate_graph_model(test_df, "Period", type_val_sep)
67 expected_data = {
68 "Subject ID": ["1", "2"],
69 "Period": ["P1", "P2"],
70 "Attribute Type": ["Attribute1", "Attribute2"],
71 "Attribute Value": ["10.0", "40.0"],
72 "Full Attribute": [
73 f"Attribute1{type_val_sep}10.0",
74 f"Attribute2{type_val_sep}40.0",
75 ],
76 }
78 expected_df = pd.DataFrame(expected_data)
79 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
82def test_generate_graph_model_column_rename(mocker):
83 data = {
84 "Subject ID": [1, 2],
85 "Custom_Period": ["P1", "P2"],
86 "Attribute1": [10, 20],
87 "Attribute2": [30, 40],
88 }
90 test_df = pd.DataFrame(data)
92 mocker.patch(
93 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
94 ).return_value = test_df.astype(str)
95 result = generate_graph_model(test_df, "Custom_Period", type_val_sep)
97 expected_data = {
98 "Subject ID": ["1", "2", "1", "2"],
99 "Period": ["P1", "P2", "P1", "P2"],
100 "Attribute Type": ["Attribute1", "Attribute1", "Attribute2", "Attribute2"],
101 "Attribute Value": ["10", "20", "30", "40"],
102 "Full Attribute": [
103 f"Attribute1{type_val_sep}10",
104 f"Attribute1{type_val_sep}20",
105 f"Attribute2{type_val_sep}30",
106 f"Attribute2{type_val_sep}40",
107 ],
108 }
110 expected_df = pd.DataFrame(expected_data)
111 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
114def test_compute_attribute_counts_basic(mocker):
115 data = {
116 "Subject ID": [1, 2, 3],
117 "Period": ["P1", "P1", "P2"],
118 "Attribute1": ["A", "A", "B"],
119 "Attribute2": ["X", "Y", "X"],
120 }
122 test_df = pd.DataFrame(data)
124 mocker.patch(
125 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
126 ).return_value = test_df
127 result = compute_attribute_counts(
128 test_df, f"Attribute1{type_val_sep}A", "Period", "P1", type_val_sep
129 )
131 expected_data = {
132 "AttributeValue": [
133 f"Attribute1{type_val_sep}A",
134 f"Attribute2{type_val_sep}X",
135 f"Attribute2{type_val_sep}Y",
136 ],
137 "Count": [2, 1, 1],
138 }
140 expected_df = pd.DataFrame(expected_data)
141 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
144def test_compute_attribute_counts_with_multiple_patterns(mocker):
145 data = {
146 "Subject ID": [1, 2, 3, 4],
147 "Period": ["P1", "P1", "P2", "P1"],
148 "Attribute1": ["A", "B", "A", "A"],
149 "Attribute2": ["X", "X", "Y", "X"],
150 }
152 test_df = pd.DataFrame(data)
154 mocker.patch(
155 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
156 ).return_value = test_df
157 result = compute_attribute_counts(
158 test_df, "Attribute1::A & Attribute2::X", "Period", "P1", type_val_sep
159 )
161 expected_data = {
162 "AttributeValue": ["Attribute2=X", "Attribute1=A", "Attribute1=B"],
163 "Count": [3, 2, 1],
164 }
166 expected_df = pd.DataFrame(expected_data)
167 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
170def test_compute_attribute_counts_with_nans(mocker):
171 data = {
172 "Subject ID": [1, 2, 3],
173 "Period": ["P1", "P1", "P2"],
174 "Attribute1": [None, "A", "A"],
175 "Attribute2": ["X", None, "Y"],
176 }
178 test_df = pd.DataFrame(data).fillna("")
180 mocker.patch(
181 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
182 ).return_value = test_df
183 result = compute_attribute_counts(
184 test_df, "Attribute1::A", "Period", "P1", type_val_sep
185 )
187 expected_data = {
188 "AttributeValue": ["Attribute1=A", "Attribute2=X"],
189 "Count": [1, 1],
190 }
192 expected_df = pd.DataFrame(expected_data)
193 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
196def test_compute_attribute_counts_invalid_pattern(mocker):
197 data = {
198 "Subject ID": [1, 2],
199 "Period": ["P1", "P1"],
200 "Attribute1": ["A", "A"],
201 "Attribute2": ["X", "Y"],
202 }
204 test_df = pd.DataFrame(data)
206 mocker.patch(
207 "intelligence_toolkit.helpers.df_functions.fix_null_ints"
208 ).return_value = test_df
209 result = compute_attribute_counts(
210 test_df, "InvalidPattern", "Period", "P1", type_val_sep
211 )
213 expected_data = {
214 "AttributeValue": ["Attribute1=A", "Attribute2=X", "Attribute2=Y"],
215 "Count": [2, 1, 1],
216 }
218 expected_df = pd.DataFrame(expected_data)
219 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df)
222def test_prepare_graph(mocker):
223 create_edge_df_from_atts_mock = mocker.patch(
224 "intelligence_toolkit.detect_case_patterns.graph_functions.create_edge_df_from_atts"
225 )
226 edge_df = pd.DataFrame(
227 {
228 "source": ["A", "B", "C", "A"],
229 "target": ["B", "C", "D", "A"],
230 "weight": [1, 2, 3, 3],
231 }
232 )
233 create_edge_df_from_atts_mock.return_value = edge_df
234 test_df = pd.DataFrame(
235 {
236 "Subject ID": [1, 2, 2, 1],
237 "Period": [2020, 2021, 2021, 2020],
238 "Full Attribute": ["ab=1", "bc=2", "ab=2", "bc=1"],
239 }
240 )
241 pdf, time_to_graph = prepare_graph(test_df, min_edge_weight, missing_edge_prop)
242 assert "Grouping ID" in pdf.columns
243 assert pdf["Grouping ID"].str.contains("@").all()
244 assert all(
245 isinstance(graph, nx.classes.graph.Graph) for graph in time_to_graph.values()
246 )
247 assert len(time_to_graph) == 2
248 assert all(
249 isinstance(graph, nx.classes.graph.Graph) for graph in time_to_graph.values()
250 )
251 assert len(time_to_graph) == 2