Coverage for intelligence_toolkit/tests/unit/helpers/test_df_functions.py: 100%

79 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3# 

4import sys 

5 

6import numpy as np 

7import pandas as pd 

8import pytest 

9 

10from intelligence_toolkit.helpers.df_functions import ( 

11 fix_null_ints, 

12 get_current_time, 

13 suppress_boolean_binary, 

14) 

15 

16 

17def test_fix_null_ints_with_float_integers(): 

18 df = pd.DataFrame({ 

19 "col1": [1.0, 2.0, 3.0], 

20 "col2": ["a", "b", "c"] 

21 }) 

22 

23 result = fix_null_ints(df) 

24 

25 assert result["col1"].dtype == "object" # Converted to string 

26 assert result["col1"].tolist() == ["1", "2", "3"] 

27 

28 

29def test_fix_null_ints_with_nan_values(): 

30 df = pd.DataFrame({ 

31 "col1": [1.0, np.nan, 3.0], 

32 "col2": ["a", "b", "c"] 

33 }) 

34 

35 result = fix_null_ints(df) 

36 

37 assert result["col1"].tolist() == ["1", "", "3"] 

38 

39 

40def test_fix_null_ints_with_mixed_floats(): 

41 df = pd.DataFrame({ 

42 "col1": [1.5, 2.5, 3.5], 

43 "col2": ["a", "b", "c"] 

44 }) 

45 

46 result = fix_null_ints(df) 

47 

48 # Should not convert non-integer floats 

49 assert "1.5" in result["col1"].tolist() 

50 

51 

52def test_fix_null_ints_does_not_modify_original(): 

53 df = pd.DataFrame({ 

54 "col1": [1.0, 2.0, 3.0], 

55 "col2": ["a", "b", "c"] 

56 }) 

57 original_df = df.copy() 

58 

59 fix_null_ints(df) 

60 

61 pd.testing.assert_frame_equal(df, original_df) 

62 

63 

64def test_fix_null_ints_empty_dataframe(): 

65 df = pd.DataFrame() 

66 result = fix_null_ints(df) 

67 

68 assert len(result) == 0 

69 

70 

71def test_fix_null_ints_no_float_columns(): 

72 df = pd.DataFrame({ 

73 "col1": [1, 2, 3], 

74 "col2": ["a", "b", "c"] 

75 }) 

76 

77 result = fix_null_ints(df) 

78 

79 assert result["col1"].tolist() == ["1", "2", "3"] 

80 assert result["col2"].tolist() == ["a", "b", "c"] 

81 

82 

83def test_get_current_time(): 

84 result = get_current_time() 

85 

86 assert isinstance(result, str) 

87 assert len(result) == 14 # YYYYMMDDHHMMSS format 

88 assert result.isdigit() 

89 

90 

91def test_get_current_time_format(): 

92 result = get_current_time() 

93 

94 # Should be in YYYYMMDDHHMMSS format 

95 year = int(result[:4]) 

96 month = int(result[4:6]) 

97 day = int(result[6:8]) 

98 

99 assert 2020 <= year <= 2100 

100 assert 1 <= month <= 12 

101 assert 1 <= day <= 31 

102 

103 

104def test_suppress_boolean_binary_with_zeros(): 

105 df = pd.DataFrame({ 

106 "col1": [0, 1, 0, 1], 

107 "col2": ["a", "b", "c", "d"] 

108 }) 

109 

110 result = suppress_boolean_binary(df) 

111 

112 # Zeros should be converted to NaN 

113 assert pd.isna(result["col1"].iloc[0]) 

114 assert pd.isna(result["col1"].iloc[2]) 

115 assert result["col1"].iloc[1] == "1" 

116 

117 

118def test_suppress_boolean_binary_with_floats(): 

119 df = pd.DataFrame({ 

120 "col1": [0.0, 1.0, 0.0, 1.0], 

121 "col2": ["a", "b", "c", "d"] 

122 }) 

123 

124 result = suppress_boolean_binary(df) 

125 

126 assert pd.isna(result["col1"].iloc[0]) 

127 assert pd.isna(result["col1"].iloc[2]) 

128 

129 

130def test_suppress_boolean_binary_with_false(): 

131 df = pd.DataFrame({ 

132 "col1": [False, True, False, True], 

133 "col2": ["a", "b", "c", "d"] 

134 }) 

135 

136 result = suppress_boolean_binary(df) 

137 

138 assert pd.isna(result["col1"].iloc[0]) 

139 assert pd.isna(result["col1"].iloc[2]) 

140 

141 

142def test_suppress_boolean_binary_with_three_values_including_nan(): 

143 df = pd.DataFrame({ 

144 "col1": [0, 1, np.nan, 1], 

145 "col2": ["a", "b", "c", "d"] 

146 }) 

147 

148 result = suppress_boolean_binary(df) 

149 

150 # Should still suppress zeros even with NaN present 

151 assert pd.isna(result["col1"].iloc[0]) 

152 

153 

154def test_suppress_boolean_binary_preserves_non_binary(): 

155 df = pd.DataFrame({ 

156 "col1": [0, 1, 2, 3], 

157 "col2": ["a", "b", "c", "d"] 

158 }) 

159 

160 result = suppress_boolean_binary(df) 

161 

162 # Function doesn't convert to string, preserves types when not binary 

163 assert result["col1"].tolist() == [0, 1, 2, 3] 

164 

165 

166def test_suppress_boolean_binary_with_output_df(): 

167 input_df = pd.DataFrame({ 

168 "col1": [0, 1, 0, 1], 

169 "col2": ["a", "b", "c", "d"] 

170 }) 

171 output_df = pd.DataFrame({ 

172 "col1": ["x", "y", "z", "w"], 

173 "col2": ["e", "f", "g", "h"] 

174 }) 

175 

176 result = suppress_boolean_binary(input_df, output_df) 

177 

178 # Should modify output_df based on input_df 

179 assert pd.isna(result["col1"].iloc[0]) 

180 

181 

182def test_suppress_boolean_binary_does_not_modify_original(): 

183 df = pd.DataFrame({ 

184 "col1": [0, 1, 0, 1], 

185 "col2": ["a", "b", "c", "d"] 

186 }) 

187 original_df = df.copy() 

188 

189 suppress_boolean_binary(df) 

190 

191 pd.testing.assert_frame_equal(df, original_df)