Coverage for tests / unit / no_torch / test_zanj_basic.py: 100%

73 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-21 22:18 -0700

1from __future__ import annotations 

2 

3import json 

4import typing 

5from pathlib import Path 

6 

7import numpy as np 

8import pandas as pd # type: ignore 

9 

10from zanj import ZANJ 

11 

12np.random.seed(0) 

13 

14 

15TEST_DATA_PATH: Path = Path("tests/junk_data") 

16 

17 

18def array_meta(x: typing.Any) -> dict: 

19 if isinstance(x, np.ndarray): 

20 return dict( 

21 shape=list(x.shape), 

22 dtype=str(x.dtype), 

23 contents=str(x), 

24 ) 

25 else: 

26 return dict( 

27 type=type(x).__name__, 

28 contents=str(x), 

29 ) 

30 

31 

32def test_numpy(): 

33 data = dict( 

34 name="testing zanj", 

35 some_array=np.random.rand(128, 128), 

36 some_other_array=np.random.rand(16, 64), 

37 small_array=np.random.rand(4, 4), 

38 ) 

39 fname: Path = TEST_DATA_PATH / "test_numpy.zanj" 

40 z: ZANJ = ZANJ() 

41 z.save(data, fname) 

42 recovered_data = z.read(fname) 

43 

44 print(f"{list(data.keys()) = }") 

45 print(f"{list(recovered_data.keys()) = }") 

46 original_vals: dict = {k: array_meta(v) for k, v in data.items()} 

47 print(json.dumps(original_vals, indent=2)) 

48 recovered_vals: dict = {k: array_meta(v) for k, v in recovered_data.items()} 

49 print(json.dumps(recovered_vals, indent=2)) 

50 

51 assert sorted(list(data.keys())) == sorted(list(recovered_data.keys())) 

52 # assert all([type(data[k]) == type(recovered_data[k]) for k in data.keys()]) 

53 

54 assert all( 

55 [ 

56 data["name"] == recovered_data["name"], 

57 np.allclose(data["some_array"], recovered_data["some_array"]), 

58 np.allclose(data["some_other_array"], recovered_data["some_other_array"]), 

59 np.allclose(data["small_array"], recovered_data["small_array"]), 

60 ] 

61 ), f"assert failed:\n{data = }\n{recovered_data = }" 

62 

63 

64def test_jsonl(): 

65 data = dict( 

66 name="testing zanj jsonl", 

67 iris_data=pd.read_csv("tests/input_data/iris.csv"), 

68 brain_data=pd.read_csv("tests/input_data/brain_networks.csv"), 

69 some_array=np.random.rand(128, 128), 

70 ) 

71 fname: Path = TEST_DATA_PATH / "test_jsonl.zanj" 

72 z: ZANJ = ZANJ() 

73 z.save(data, fname) 

74 recovered_data = z.read(fname) 

75 

76 assert sorted(list(data.keys())) == sorted(list(recovered_data.keys())) 

77 # assert all([type(data[k]) == type(recovered_data[k]) for k in data.keys()]) 

78 

79 assert all( 

80 [ 

81 data["name"] == recovered_data["name"], 

82 np.allclose(data["some_array"], recovered_data["some_array"]), 

83 data["iris_data"].equals(recovered_data["iris_data"]), 

84 data["brain_data"].equals(recovered_data["brain_data"]), 

85 ] 

86 ) 

87 

88 

89def test_polars_dataframe(): 

90 import polars as pl 

91 

92 # basic dataframe with various types 

93 data = dict( 

94 name="testing zanj polars", 

95 df=pl.DataFrame( 

96 { 

97 "a": [1, 2, 3], 

98 "b": ["x", "y", "z"], 

99 "c": [1.1, 2.2, 3.3], 

100 } 

101 ), 

102 some_array=np.random.rand(128, 128), 

103 ) 

104 fname: Path = TEST_DATA_PATH / "test_polars.zanj" 

105 z: ZANJ = ZANJ() 

106 z.save(data, fname) 

107 recovered_data = z.read(fname) 

108 

109 assert sorted(list(data.keys())) == sorted(list(recovered_data.keys())) 

110 

111 assert all( 

112 [ 

113 data["name"] == recovered_data["name"], 

114 np.allclose(data["some_array"], recovered_data["some_array"]), 

115 data["df"].equals(recovered_data["df"]), 

116 ] 

117 ) 

118 

119 

120def test_polars_dataframe_empty(): 

121 """Test empty polars DataFrame serialization""" 

122 import polars as pl 

123 

124 data = dict( 

125 name="testing empty polars df", 

126 empty_df=pl.DataFrame({"a": [], "b": [], "c": []}), 

127 ) 

128 fname: Path = TEST_DATA_PATH / "test_polars_empty.zanj" 

129 z: ZANJ = ZANJ() 

130 z.save(data, fname) 

131 recovered_data = z.read(fname) 

132 

133 assert data["name"] == recovered_data["name"] 

134 assert recovered_data["empty_df"].shape == (0, 3) 

135 assert recovered_data["empty_df"].columns == ["a", "b", "c"] 

136 

137 

138def test_polars_dataframe_large(): 

139 """Test larger polars DataFrame to ensure external storage works""" 

140 import polars as pl 

141 

142 # create a larger dataframe 

143 n_rows = 1000 

144 data = dict( 

145 name="testing large polars df", 

146 large_df=pl.DataFrame( 

147 { 

148 "int_col": list(range(n_rows)), 

149 "float_col": [float(i) * 0.1 for i in range(n_rows)], 

150 "str_col": [f"row_{i}" for i in range(n_rows)], 

151 "bool_col": [i % 2 == 0 for i in range(n_rows)], 

152 } 

153 ), 

154 ) 

155 fname: Path = TEST_DATA_PATH / "test_polars_large.zanj" 

156 z: ZANJ = ZANJ() 

157 z.save(data, fname) 

158 recovered_data = z.read(fname) 

159 

160 assert data["name"] == recovered_data["name"] 

161 assert data["large_df"].equals(recovered_data["large_df"]) 

162 

163 

164def test_polars_with_nulls(): 

165 """Test polars DataFrame with null values""" 

166 import polars as pl 

167 

168 data = dict( 

169 name="testing polars with nulls", 

170 df_with_nulls=pl.DataFrame( 

171 { 

172 "a": [1, None, 3], 

173 "b": ["x", "y", None], 

174 "c": [1.1, None, 3.3], 

175 } 

176 ), 

177 ) 

178 fname: Path = TEST_DATA_PATH / "test_polars_nulls.zanj" 

179 z: ZANJ = ZANJ() 

180 z.save(data, fname) 

181 recovered_data = z.read(fname) 

182 

183 assert data["name"] == recovered_data["name"] 

184 assert data["df_with_nulls"].equals(recovered_data["df_with_nulls"])