Coverage for intelligence_toolkit/tests/unit/detect_case_patterns/test_model.py: 100%

72 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3# 

4 

5import networkx as nx 

6import pandas as pd 

7 

8from intelligence_toolkit.detect_case_patterns.config import ( 

9 min_edge_weight, 

10 missing_edge_prop, 

11 type_val_sep, 

12) 

13from intelligence_toolkit.detect_case_patterns.model import ( 

14 compute_attribute_counts, 

15 generate_graph_model, 

16 prepare_graph, 

17) 

18 

19 

20def test_generate_graph_model_basic(mocker): 

21 data = { 

22 "Subject ID": [1, 2], 

23 "Period": ["P1", "P2"], 

24 "Attribute1": [10, 20], 

25 "Attribute2": [30, 40], 

26 } 

27 

28 test_df = pd.DataFrame(data) 

29 

30 mocker.patch( 

31 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

32 ).return_value = test_df.astype(str) 

33 result = generate_graph_model(test_df, "Period", type_val_sep) 

34 

35 expected_data = { 

36 "Subject ID": ["1", "2", "1", "2"], 

37 "Period": ["P1", "P2", "P1", "P2"], 

38 "Attribute Type": ["Attribute1", "Attribute1", "Attribute2", "Attribute2"], 

39 "Attribute Value": ["10", "20", "30", "40"], 

40 "Full Attribute": [ 

41 f"Attribute1{type_val_sep}10", 

42 f"Attribute1{type_val_sep}20", 

43 f"Attribute2{type_val_sep}30", 

44 f"Attribute2{type_val_sep}40", 

45 ], 

46 } 

47 

48 expected_df = pd.DataFrame(expected_data) 

49 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

50 

51 

52def test_generate_graph_model_with_nans(mocker): 

53 data = { 

54 "Subject ID": [1, 2, None], 

55 "Period": ["P1", "P2", None], 

56 "Attribute1": [10, None, 30], 

57 "Attribute2": [None, 40, 50], 

58 } 

59 

60 test_df = pd.DataFrame(data) 

61 

62 mocker.patch( 

63 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

64 ).return_value = test_df.fillna("").astype(str) 

65 result = generate_graph_model(test_df, "Period", type_val_sep) 

66 

67 expected_data = { 

68 "Subject ID": ["1", "2"], 

69 "Period": ["P1", "P2"], 

70 "Attribute Type": ["Attribute1", "Attribute2"], 

71 "Attribute Value": ["10.0", "40.0"], 

72 "Full Attribute": [ 

73 f"Attribute1{type_val_sep}10.0", 

74 f"Attribute2{type_val_sep}40.0", 

75 ], 

76 } 

77 

78 expected_df = pd.DataFrame(expected_data) 

79 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

80 

81 

82def test_generate_graph_model_column_rename(mocker): 

83 data = { 

84 "Subject ID": [1, 2], 

85 "Custom_Period": ["P1", "P2"], 

86 "Attribute1": [10, 20], 

87 "Attribute2": [30, 40], 

88 } 

89 

90 test_df = pd.DataFrame(data) 

91 

92 mocker.patch( 

93 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

94 ).return_value = test_df.astype(str) 

95 result = generate_graph_model(test_df, "Custom_Period", type_val_sep) 

96 

97 expected_data = { 

98 "Subject ID": ["1", "2", "1", "2"], 

99 "Period": ["P1", "P2", "P1", "P2"], 

100 "Attribute Type": ["Attribute1", "Attribute1", "Attribute2", "Attribute2"], 

101 "Attribute Value": ["10", "20", "30", "40"], 

102 "Full Attribute": [ 

103 f"Attribute1{type_val_sep}10", 

104 f"Attribute1{type_val_sep}20", 

105 f"Attribute2{type_val_sep}30", 

106 f"Attribute2{type_val_sep}40", 

107 ], 

108 } 

109 

110 expected_df = pd.DataFrame(expected_data) 

111 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

112 

113 

114def test_compute_attribute_counts_basic(mocker): 

115 data = { 

116 "Subject ID": [1, 2, 3], 

117 "Period": ["P1", "P1", "P2"], 

118 "Attribute1": ["A", "A", "B"], 

119 "Attribute2": ["X", "Y", "X"], 

120 } 

121 

122 test_df = pd.DataFrame(data) 

123 

124 mocker.patch( 

125 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

126 ).return_value = test_df 

127 result = compute_attribute_counts( 

128 test_df, f"Attribute1{type_val_sep}A", "Period", "P1", type_val_sep 

129 ) 

130 

131 expected_data = { 

132 "AttributeValue": [ 

133 f"Attribute1{type_val_sep}A", 

134 f"Attribute2{type_val_sep}X", 

135 f"Attribute2{type_val_sep}Y", 

136 ], 

137 "Count": [2, 1, 1], 

138 } 

139 

140 expected_df = pd.DataFrame(expected_data) 

141 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

142 

143 

144def test_compute_attribute_counts_with_multiple_patterns(mocker): 

145 data = { 

146 "Subject ID": [1, 2, 3, 4], 

147 "Period": ["P1", "P1", "P2", "P1"], 

148 "Attribute1": ["A", "B", "A", "A"], 

149 "Attribute2": ["X", "X", "Y", "X"], 

150 } 

151 

152 test_df = pd.DataFrame(data) 

153 

154 mocker.patch( 

155 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

156 ).return_value = test_df 

157 result = compute_attribute_counts( 

158 test_df, "Attribute1::A & Attribute2::X", "Period", "P1", type_val_sep 

159 ) 

160 

161 expected_data = { 

162 "AttributeValue": ["Attribute2=X", "Attribute1=A", "Attribute1=B"], 

163 "Count": [3, 2, 1], 

164 } 

165 

166 expected_df = pd.DataFrame(expected_data) 

167 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

168 

169 

170def test_compute_attribute_counts_with_nans(mocker): 

171 data = { 

172 "Subject ID": [1, 2, 3], 

173 "Period": ["P1", "P1", "P2"], 

174 "Attribute1": [None, "A", "A"], 

175 "Attribute2": ["X", None, "Y"], 

176 } 

177 

178 test_df = pd.DataFrame(data).fillna("") 

179 

180 mocker.patch( 

181 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

182 ).return_value = test_df 

183 result = compute_attribute_counts( 

184 test_df, "Attribute1::A", "Period", "P1", type_val_sep 

185 ) 

186 

187 expected_data = { 

188 "AttributeValue": ["Attribute1=A", "Attribute2=X"], 

189 "Count": [1, 1], 

190 } 

191 

192 expected_df = pd.DataFrame(expected_data) 

193 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

194 

195 

196def test_compute_attribute_counts_invalid_pattern(mocker): 

197 data = { 

198 "Subject ID": [1, 2], 

199 "Period": ["P1", "P1"], 

200 "Attribute1": ["A", "A"], 

201 "Attribute2": ["X", "Y"], 

202 } 

203 

204 test_df = pd.DataFrame(data) 

205 

206 mocker.patch( 

207 "intelligence_toolkit.helpers.df_functions.fix_null_ints" 

208 ).return_value = test_df 

209 result = compute_attribute_counts( 

210 test_df, "InvalidPattern", "Period", "P1", type_val_sep 

211 ) 

212 

213 expected_data = { 

214 "AttributeValue": ["Attribute1=A", "Attribute2=X", "Attribute2=Y"], 

215 "Count": [2, 1, 1], 

216 } 

217 

218 expected_df = pd.DataFrame(expected_data) 

219 pd.testing.assert_frame_equal(result.reset_index(drop=True), expected_df) 

220 

221 

222def test_prepare_graph(mocker): 

223 create_edge_df_from_atts_mock = mocker.patch( 

224 "intelligence_toolkit.detect_case_patterns.graph_functions.create_edge_df_from_atts" 

225 ) 

226 edge_df = pd.DataFrame( 

227 { 

228 "source": ["A", "B", "C", "A"], 

229 "target": ["B", "C", "D", "A"], 

230 "weight": [1, 2, 3, 3], 

231 } 

232 ) 

233 create_edge_df_from_atts_mock.return_value = edge_df 

234 test_df = pd.DataFrame( 

235 { 

236 "Subject ID": [1, 2, 2, 1], 

237 "Period": [2020, 2021, 2021, 2020], 

238 "Full Attribute": ["ab=1", "bc=2", "ab=2", "bc=1"], 

239 } 

240 ) 

241 pdf, time_to_graph = prepare_graph(test_df, min_edge_weight, missing_edge_prop) 

242 assert "Grouping ID" in pdf.columns 

243 assert pdf["Grouping ID"].str.contains("@").all() 

244 assert all( 

245 isinstance(graph, nx.classes.graph.Graph) for graph in time_to_graph.values() 

246 ) 

247 assert len(time_to_graph) == 2 

248 assert all( 

249 isinstance(graph, nx.classes.graph.Graph) for graph in time_to_graph.values() 

250 ) 

251 assert len(time_to_graph) == 2