Coverage for intelligence_toolkit/tests/unit/helpers/test_df_functions.py: 100%
79 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2# Licensed under the MIT license. See LICENSE file in the project.
3#
4import sys
6import numpy as np
7import pandas as pd
8import pytest
10from intelligence_toolkit.helpers.df_functions import (
11 fix_null_ints,
12 get_current_time,
13 suppress_boolean_binary,
14)
17def test_fix_null_ints_with_float_integers():
18 df = pd.DataFrame({
19 "col1": [1.0, 2.0, 3.0],
20 "col2": ["a", "b", "c"]
21 })
23 result = fix_null_ints(df)
25 assert result["col1"].dtype == "object" # Converted to string
26 assert result["col1"].tolist() == ["1", "2", "3"]
29def test_fix_null_ints_with_nan_values():
30 df = pd.DataFrame({
31 "col1": [1.0, np.nan, 3.0],
32 "col2": ["a", "b", "c"]
33 })
35 result = fix_null_ints(df)
37 assert result["col1"].tolist() == ["1", "", "3"]
40def test_fix_null_ints_with_mixed_floats():
41 df = pd.DataFrame({
42 "col1": [1.5, 2.5, 3.5],
43 "col2": ["a", "b", "c"]
44 })
46 result = fix_null_ints(df)
48 # Should not convert non-integer floats
49 assert "1.5" in result["col1"].tolist()
52def test_fix_null_ints_does_not_modify_original():
53 df = pd.DataFrame({
54 "col1": [1.0, 2.0, 3.0],
55 "col2": ["a", "b", "c"]
56 })
57 original_df = df.copy()
59 fix_null_ints(df)
61 pd.testing.assert_frame_equal(df, original_df)
64def test_fix_null_ints_empty_dataframe():
65 df = pd.DataFrame()
66 result = fix_null_ints(df)
68 assert len(result) == 0
71def test_fix_null_ints_no_float_columns():
72 df = pd.DataFrame({
73 "col1": [1, 2, 3],
74 "col2": ["a", "b", "c"]
75 })
77 result = fix_null_ints(df)
79 assert result["col1"].tolist() == ["1", "2", "3"]
80 assert result["col2"].tolist() == ["a", "b", "c"]
83def test_get_current_time():
84 result = get_current_time()
86 assert isinstance(result, str)
87 assert len(result) == 14 # YYYYMMDDHHMMSS format
88 assert result.isdigit()
91def test_get_current_time_format():
92 result = get_current_time()
94 # Should be in YYYYMMDDHHMMSS format
95 year = int(result[:4])
96 month = int(result[4:6])
97 day = int(result[6:8])
99 assert 2020 <= year <= 2100
100 assert 1 <= month <= 12
101 assert 1 <= day <= 31
104def test_suppress_boolean_binary_with_zeros():
105 df = pd.DataFrame({
106 "col1": [0, 1, 0, 1],
107 "col2": ["a", "b", "c", "d"]
108 })
110 result = suppress_boolean_binary(df)
112 # Zeros should be converted to NaN
113 assert pd.isna(result["col1"].iloc[0])
114 assert pd.isna(result["col1"].iloc[2])
115 assert result["col1"].iloc[1] == "1"
118def test_suppress_boolean_binary_with_floats():
119 df = pd.DataFrame({
120 "col1": [0.0, 1.0, 0.0, 1.0],
121 "col2": ["a", "b", "c", "d"]
122 })
124 result = suppress_boolean_binary(df)
126 assert pd.isna(result["col1"].iloc[0])
127 assert pd.isna(result["col1"].iloc[2])
130def test_suppress_boolean_binary_with_false():
131 df = pd.DataFrame({
132 "col1": [False, True, False, True],
133 "col2": ["a", "b", "c", "d"]
134 })
136 result = suppress_boolean_binary(df)
138 assert pd.isna(result["col1"].iloc[0])
139 assert pd.isna(result["col1"].iloc[2])
142def test_suppress_boolean_binary_with_three_values_including_nan():
143 df = pd.DataFrame({
144 "col1": [0, 1, np.nan, 1],
145 "col2": ["a", "b", "c", "d"]
146 })
148 result = suppress_boolean_binary(df)
150 # Should still suppress zeros even with NaN present
151 assert pd.isna(result["col1"].iloc[0])
154def test_suppress_boolean_binary_preserves_non_binary():
155 df = pd.DataFrame({
156 "col1": [0, 1, 2, 3],
157 "col2": ["a", "b", "c", "d"]
158 })
160 result = suppress_boolean_binary(df)
162 # Function doesn't convert to string, preserves types when not binary
163 assert result["col1"].tolist() == [0, 1, 2, 3]
166def test_suppress_boolean_binary_with_output_df():
167 input_df = pd.DataFrame({
168 "col1": [0, 1, 0, 1],
169 "col2": ["a", "b", "c", "d"]
170 })
171 output_df = pd.DataFrame({
172 "col1": ["x", "y", "z", "w"],
173 "col2": ["e", "f", "g", "h"]
174 })
176 result = suppress_boolean_binary(input_df, output_df)
178 # Should modify output_df based on input_df
179 assert pd.isna(result["col1"].iloc[0])
182def test_suppress_boolean_binary_does_not_modify_original():
183 df = pd.DataFrame({
184 "col1": [0, 1, 0, 1],
185 "col2": ["a", "b", "c", "d"]
186 })
187 original_df = df.copy()
189 suppress_boolean_binary(df)
191 pd.testing.assert_frame_equal(df, original_df)